I am new to SQLite and wondering how to create a backup for a database, on a similar site I have found a question on how to create a backup for a database but I am having problems getting it to work.
This is the question:https://codereview.stackexchange.com/questions/78643/create-sqlite-backups
This is the code:
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import sqlite3
import shutil
import time
import os
NO_OF_DAYS = 7
def sqlite3_backup(dbfile, backupdir):
dbfile = Accounts.db
backupdir = r"E:\TESTING\BackUp.db"
"""Create timestamped database copy"""
if not os.path.isdir(backupdir):
raise Exception("Backup directory does not exist: {}".format(backupdir))
backup_file = r"E:\TESTING\BackUp.db" + time.strftime("-%Y%m%d-%H%M%S")
connection = sqlite3.connect(r"E:\TESTING\Accounts.db")
cursor = connection.cursor()
# Lock database before making a backup
cursor.execute('begin immediate')
# Make new backup file
shutil.copyfile(dbfile, backup_file)
print ("\nCreating {}...".format(backup_file))
# Unlock database
connection.rollback()
def clean_data(backup_dir):
backup_dir = r"E:\TESTING\BackUp.db"
print ("\n------------------------------")
print ("Cleaning up old backups")
for filename in os.listdir(backup_dir):
backup_file = os.path.join(backup_dir, filename)
if os.stat(backup_file).st_ctime < (time.time() - NO_OF_DAYS * 86400):
if os.path.isfile(backup_file):
os.remove(backup_file)
print ("Deleting {}...".format(ibackup_file))
def get_arguments():
## connection = sqlite3.connect(r"E:\TESTING\Accounts.db")
## cursor = connection.cursor()
backup_dir = r"E:\TESTING\BackUp.db"
db_file = sqlite3.connect(r"E:\TESTING\Accounts.db")
"""Parse the commandline arguments from the user"""
parser = argparse.ArgumentParser()
parser.add_argument('db_file',
help='the database file that needs backed up')
parser.add_argument('backup_dir',
help='the directory where the backup'
'file should be saved')
return parser.parse_args()
if __name__ == "__main__":
#args = get_arguments()
dbfile = Accounts
backup_dir = "E:\TESTING"
#sqlite3_backup(args.db_file, args.backup_dir)
sqlite3_backup(db_file, backup_dir)
clean_data(args.backup_dir)
print ("\nBackup update has been successful.")
When I run the code I get this error usage: backup.py [-h] db_file backup_dir
backup.py: error: the following arguments are required: db_file, backup_dir
I have subbed into the code the db_file and the backup_dir but it still appearing with the same error.
You may need to change this part:
if __name__ == "__main__":
#args = get_arguments()
dbfile = <<YOUR DB FILE NAME >>
backup_dir = <<YOUR BACK UP DIRECTORY PATH>>
#sqlite3_backup(args.db_file, args.backup_dir)
sqlite3_backup(db_file, backup_dir)
# CHANGE clean_data(args.backup_dir)
#TO:
clean_data(backup_dir)
print ("\nBackup update has been successful.")
Related
I have a proof of concept script based on the watchdog module, it registers when a new file is added to a set folder and sends off a command, this script runs constantly, but the final design is to be put on a server, meaning we will not have access to the command line to "CTRL + C" it. How do I kill it from an outside source (e.g. second script that activates a function within the primary script)?
Here is my current script, which contains a "stop_watchdog" function at the bottom.
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import os, sys, time
import sqlite3
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if __name__ == "__main__":
patterns = ["*"]
ignore_patterns = None
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
def file_detected(textInput):
str(textInput)
if ".txt" not in textInput:
conn = sqlite3.connect(textInput) # You can create a new database by changing the name within the quotes
c = conn.cursor() # The database will be saved in the location where your 'py' file is saved
c.execute("SELECT * FROM sqlite_master where type = 'table'")
##print(c.fetchall())
textTest = "{}.txt".format(textInput)
f = open(textTest, "w")
f.write(str(c.fetchall()))
f.close()
def on_created(event):
print(f"hey, {event.src_path} has been created!")
file_detected(event.src_path)
##test("{event.src_path}", shell=True)
my_event_handler.on_created = on_created
path = "./xyz"
go_recursively = True
file_observer = Observer()
file_observer.schedule(my_event_handler, path, recursive=go_recursively)
file_observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
file_observer.stop()
file_observer.join()
def stop_watchdog():
print(f"Quitting!")
file_observer.stop()
sys.exit()
I'm trying to customise the SFTOperator take download multiple file from a server. I know that the original SFTPOperator only allow one file at a time.
I copied the same code from source and I twerk by adding a new function called get_xml_from_source(). Please refer the code below:
def get_xml_from_source(sftp_client, remote_filepath, local_filepath, prev_execution_date, execution_date):
"""
Copy from Source to local path
"""
files_attr = sftp_client.listdir_attr(remote_filepath) # eg: /source/ HITTING ERROR HERE
files_name = sftp_client.listdir(remote_filepath) # eg: /source/
today_midnight = datetime.combine(datetime.today(), time.min)
yesterday_midnight = today_midnight - timedelta(days=1)
for file_attr, file_name in zip(files_attr, files_name):
modified_time = datetime.fromtimestamp(file_attr.st_mtime)
if yesterday_midnight <= modified_time < today_midnight:
# if prev_execution_date <= modified_time < execution_date:
try:
# Download to local path
sftp_client.get(remote_filepath, local_filepath)
print(file_name)
except: # pylint: disable=bare-except
print("File not found")
else:
print("Not the file!")
Where this function will only download files from yesterday up to today.
I added the function at this line:
with self.ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp()
if self.operation.lower() == SFTPOperation.GET:
local_folder = os.path.dirname(self.local_filepath)
if self.create_intermediate_dirs:
# Create Intermediate Directories if it doesn't exist
try:
os.makedirs(local_folder)
except OSError:
if not os.path.isdir(local_folder):
raise
file_msg = "from {0} to {1}".format(self.remote_filepath,
self.local_filepath)
self.log.info("Starting to transfer %s", file_msg)
# This is where it starts to copy, customization begins here
# sftp_client.get(self.remote_filepath, self.local_filepath) <--- Original code that I commented out and replace with mine below
get_xml_from_source(sftp_client, self.remote_filepath,
self.local_filepath, self.prev_execution_date, self.execution_date)
Note that, rest of the codes did not change. It is how it looks like in the source.
I keep hitting error at files_attr = sftp_client.listdir_attr(remote_filepath) with this error:
Error while transferring from /source/ to
/path/to/destination, error: [Errno 2] No such file.
Which obviously meant, it can't find the sftp directory. I tried running the whole function locally, it works fine.
Is there any part of the code that tied the paramiko connection to only get one file? I checked the paramiko connection for SFTPOperator, it should be just fine. In this case, how should I fix it?
This is how I established my connection when running locally :
def connect_to_source():
"""
Get source credentials
:param: None
:return: username & password
"""
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
username, password = get_eet_credentials()
# key = paramiko.RSAKey.from_private_key_file(openssh_key, password=password)
ssh.connect(hostname=SFTP_SERVER, port=SFTP_PORT_NUMBER,
username=username, password=password)
client = ssh.open_sftp()
print("Connection to source success!")
return client
Lastly, below is my airflow task:
def copy_from_source():
"""
Copy XML file from source to local path
"""
return SFTPOperator(
task_id="copy_from_source",
ssh_conn_id="source_conn",
local_filepath=f"{current_dir}/destination",
remote_filepath= "/source/",
prev_execution_date='{{ prev_execution_date }}',
execution_date='{{ execution_date }}', # strftime("%Y-%m-%d %H:%M:%S")
create_intermediate_dirs=True,
operation="get",
dag=dag
)
I'm trying to do something similar to you. I'm not sure what is causing the issues you are facing but this is the updated SFTP Operator I have written that gets multiple files from a server
sftp_get_multiple_files_operator.py
import os
from pathlib import Path
from typing import Any
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.contrib.hooks import SSHHook
class SFTPGetMultipleFilesOperator(BaseOperator):
template_fields = ('local_directory', 'remote_filename_pattern', 'remote_host')
def __init__(
self,
*,
ssh_hook=None,
ssh_conn_id=None,
remote_host=None,
local_directory=None,
remote_filename_pattern=None,
filetype=None,
confirm=True,
create_intermediate_dirs=False,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.ssh_hook = ssh_hook
self.ssh_conn_id = ssh_conn_id
self.remote_host = remote_host
self.local_directory = local_directory
self.filetype = filetype
self.remote_filename_pattern = remote_filename_pattern
self.confirm = confirm
self.create_intermediate_dirs = create_intermediate_dirs
def execute(self, context: Any) -> str:
file_msg = None
try:
if self.ssh_conn_id:
if self.ssh_hook and isinstance(self.ssh_hook, SSHHook):
self.log.info("ssh_conn_id is ignored when ssh_hook is provided.")
else:
self.log.info(
"ssh_hook is not provided or invalid. Trying ssh_conn_id to create SSHHook."
)
self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)
if not self.ssh_hook:
raise AirflowException("Cannot operate without ssh_hook or ssh_conn_id.")
if self.remote_host is not None:
self.log.info(
"remote_host is provided explicitly. "
"It will replace the remote_host which was defined "
"in ssh_hook or predefined in connection of ssh_conn_id."
)
self.ssh_hook.remote_host = self.remote_host
with self.ssh_hook.get_conn() as ssh_client:
sftp_client = ssh_client.open_sftp()
all_files = sftp_client.listdir()
self.log.info(f'Found {len(all_files)} files on server')
timestamp = context['ds_nodash']
filename_pattern = self.remote_filename_pattern + timestamp
# fetch all CSV files for the run date that match the filename pattern
matching_files = [f for f in all_files
if f.find(filename_pattern) != -1]
# if file type is specified filter matching files for the file type
if self.filetype is not None:
matching_files = [filename for filename in matching_files
if filename[-len(self.filetype):] == self.filetype]
self.log.info(f'Found {len(matching_files)} files with name including {filename_pattern}')
local_folder = os.path.dirname(self.local_directory)
if self.create_intermediate_dirs:
Path(local_folder).mkdir(parents=True, exist_ok=True)
for f in matching_files:
self.log.info(f"Starting to transfer from /{f} to {self.local_directory}/{f}")
sftp_client.get(f'/{f}', f'{self.local_directory}/{f}')
except Exception as e:
raise AirflowException(f"Error while transferring {file_msg}, error: {str(e)}")
return self.local_directory
def _make_intermediate_dirs(sftp_client, remote_directory) -> None:
"""
Create all the intermediate directories in a remote host
:param sftp_client: A Paramiko SFTP client.
:param remote_directory: Absolute Path of the directory containing the file
:return:
"""
if remote_directory == '/':
sftp_client.chdir('/')
return
if remote_directory == '':
return
try:
sftp_client.chdir(remote_directory)
except OSError:
dirname, basename = os.path.split(remote_directory.rstrip('/'))
_make_intermediate_dirs(sftp_client, dirname)
sftp_client.mkdir(basename)
sftp_client.chdir(basename)
return
dag.py
sftp_report = SFTPGetMultipleFilesOperator(
task_id=f"sftp_reports_to_gcs",
ssh_conn_id="sftp_connection",
local_directory=f'/opt/airflow/dags/reports',
remote_filename_pattern=f'reportname_', # ds_nodash is added in the operator by accessing Airflow context
create_intermediate_dirs=True,
filetype='.csv'
)
I'm in the process of automating our mariadb backup jobs into a python program. However for some reason it appears that the --target-directory parameter, although being passed correctly and validated by printing, is ignored when running the actual command from os.system. code below:
import os
import datetime, time
import mysql.connector as mariadb
unix_socket = "/var/lib/mysql/mysql.sock"
currentdate = datetime.datetime.now()
bkp_path = time.strftime("/%Y/%m-%d/%H%M")
ro_status = ("show global variables like 'read_only%'")
dblist = ("show databases")
db = mariadb.connect(user='pytest', password='pytest', unix_socket=unix_socket)
cur = db.cursor()
cur.execute(ro_status)
result = cur.fetchall()
for r in result:
if "OFF" in r:
cur.execute(dblist)
dbs = cur.fetchall()
for d in dbs:
dbstr = ''.join(d)
bkp_path = "/backups/" + dbstr + time.strftime("/%Y/%m-%d/%H%M/")
bkp_cmd = "sudo mariabackup --backup --databases='" + dbstr + "' --target-directory=" + bkp_path +" --user pytest --password=pytest --no-lock"
try:
os.stat(bkp_path)
except:
os.makedirs(bkp_path)
try:
# print(bkp_cmd)
os.system(bkp_cmd)
except:
print("Problem running backup on this host")
else:
print(h + " is read only and will not be backed up")
Example of the printed command:
sudo mariabackup --backup --databases="testdbBA" --target-directory=/backups/testdbBA/2019/03-06/1659/ --user pytest --password=pytest --no-lock
even when run seperatly via the printed block - it attempts to write to my local home directory and not to the specified target dir.
its --target-dir not --target-directory
thanks to MFisherKDX
I have a database that I want to back up with my a python code.
I tried to base my code from the code in this discussion that uses the subprocess module and pg_dump. My problem now is that I have to manually type in the password to get the backup file. I read somewhere that doing a .pgpass but I do want to see if it is possible to do it within the subprocess module.
My code follows below:
from subprocess import Popen, PIPE
from pathlib import Path, PureWindowsPath
def backup():
version = 11
postgresDir = Path("C:/Program Files/PostgreSQL/{}/bin/".format(version))
directory = PureWindowsPath(postgresDir)
filename = 'myBackUp2' # output filename here
saveDir = Path("D:/Desktop/{}.tar".format(filename)) # output directory here
file = PureWindowsPath(saveDir)
host = 'localhost'
user = 'postgres'
port = '5434'
dbname = 'database_name' # database name here
proc = Popen(['pg_dump', '-h', host, '-U', user, '-W', '-p', port,
'-F', 't', '-f', str(file), '-d', dbname],
cwd=directory, shell=True, stdin=PIPE)
proc.wait()
backup()
The code above works and the backup is created is I type in the password. I tried to replace proc.wait() with the code below to remove the need of typing the password manually:
return proc.communicate('{}\n'.format(database_password))
But I would receive this error:
TypeError: a bytes-like object is required, not 'str'
Is this possible to do within subprocess? If so, how?
Use a password file.
On Microsoft Windows the file is named %APPDATA%\postgresql\pgpass.conf (where %APPDATA% refers to the Application Data subdirectory in the user's profile).
and the -w or --no-password command line option (instead of -W)
-w
--no-password
Never issue a password prompt. If the server requires password authentication and a password is not available by other means such as a .pgpass file, the connection attempt will fail. This option can be useful in batch jobs and scripts where no user is present to enter a password.
The easiest is to use the PGPASSWORD environment variable.
There is two classes:
First class needed for create dsn string. Then try to connect
with dsn parameters. If cannot connect go to Second class.
Second class needed for create for create DataBase and restore all tables
from file. You need to remake this strings:
for correctly open your DataBase dump_file
__folder_name = Path(__file__).parent.parent
__folder_name_data = os.path.join(__folder_name, 'data')
__file_to_open = os.path.join(__folder_name_data, 'bd.backup')
import os
import textwrap
from pathlib import Path
from subprocess import Popen, PIPE
class DataBaseAPI:
__slots__ = ('__dsn', 'cur')
def __init__(self):
self.__dsn = self.__dsn_string()
self.cur = self.__connection()
#staticmethod
def __dsn_string() -> dict:
print(f'INPUT name of DataBase')
name = input()
print(f'INPUT password of DataBase')
password = input()
print(f'INPUT user_name of DataBase or press ENTER if user_name="postgres"')
user_name = input()
if len(user_name) == 0:
user_name = 'postgres'
print(f'INPUT host_name of DataBase or press ENTER if host_name="localhost"')
host_name = input()
if len(host_name) == 0:
host_name = 'localhost'
return {'dbname': name, 'user': user_name, 'password': password, 'host': host_name}
def __connection(self):
try:
conn = psycopg2.connect(dbname=self.__dsn['dbname'], user=self.__dsn['user'],
host=self.__dsn['host'], password=self.__dsn['password'], port=5432)
except psycopg2.OperationalError:
print(textwrap.fill(f'There is no existing DataBase. Creating new DataBase', 80,
subsequent_indent=' '))
DataBaseCreator(self.__dsn)
conn = psycopg2.connect(dbname=self.__dsn['dbname'], user=self.__dsn['user'],
host=self.__dsn['host'], password=self.__dsn['password'], port=5432)
finally:
conn.autocommit = True
cur = conn.cursor()
print(f'DataBase connection complete')
return cur
class DataBaseCreator:
def __init__(self, dsn):
self.__dsn = dsn
self.__check_conf_file()
self.__create_data_base()
self.__restore_data_base()
def __check_conf_file(self):
__app_data = os.environ.copy()["APPDATA"]
__postgres_path = Path(f'{__app_data}\postgresql')
__pgpass_file = Path(f'{__postgres_path}\pgpass.conf')
parameters = f'{self.__dsn["host"]}:{5432}:{self.__dsn["dbname"]}:' \
f'{self.__dsn["user"]}:{int(self.__dsn["password"])}\n'
if not os.path.isdir(__postgres_path):
os.makedirs(__postgres_path)
if os.path.isfile(__pgpass_file):
log.debug(f'File "pgpass.conf" already exists')
with open(__pgpass_file, 'r+') as f:
content = f.readlines()
if parameters not in content:
# сервер: порт:база_данных: имя_пользователя:пароль
f.write(parameters)
else:
log.info(f' {parameters} already in "pgpass.conf" file')
else:
log.debug(f'File "pgpass.conf" not exists')
with open(__pgpass_file, 'x') as f:
# сервер: порт:база_данных: имя_пользователя:пароль
f.write(parameters)
def __create_data_base(self):
try:
__conn = psycopg2.connect(dbname='postgres', user=self.__dsn['user'],
host=self.__dsn['host'], password=self.__dsn['password'], port=5432)
except Exception as _:
log.exception(f'{_}')
else:
__conn.autocommit = True
__cur = __conn.cursor()
__query = f'CREATE DATABASE "{self.__dsn["dbname"]}"'
__cur.execute(__query)
log.info(f'{__query}')
def __restore_data_base(self):
__col = [x for x in self.__dsn.values()]
__folder_name = Path(__file__).parent.parent
__folder_name_data = os.path.join(__folder_name, 'data')
__file_to_open = os.path.join(__folder_name_data, 'bd.backup')
__cmd = f'pg_restore --host={__col[3]} --dbname={__col[0]} --username={__col[1]} ' \
f'--verbose=True --no-password ' \
f'{__file_to_open}'
try:
__proc = Popen(__cmd, stdout=PIPE, stderr=PIPE)
except FileNotFoundError:
log.info(f'FileNotFoundError: [WinError 2] Не удается найти указанный файл')
log.info(textwrap.fill(f'You need to SET Windows $PATH for use "pg_restore" in cmd', 80,
subsequent_indent=' '))
else:
__stderr = __proc.communicate()[1].decode('utf-8', errors="ignore").strip()
log.debug(textwrap.fill(f'{__stderr}', 80))
One more option is to use dbname parameter
'pg_dump --dbname=postgresql://{}:{}#{}:{}/{}'.format(user, password, host, port, database_name)
I am trying to extract images from a location to (mysql) ID location, it creates the folders according to the mysql table ID but no images are copied.( extracting images from one location and copying them on the ID location)
here is my script.
import datetime
import sys
import os
import MySQLdb
db_driver = "mysql"
host = "localhost"
db = "customer_1"
user = "root"
passwd = "H0t"
db = MySQLdb.connect(host='localhost', user='root', passwd='H0t', db='customer_1')
cursor = db.cursor()
cam_name = sys.argv[1]
topLevel = "/var/wwwdev/cam_images/%s_anpr_vega" % cam_name
sql = """select id,image FROM %s_anpr_vega WHERE image IS NOT NULL LIMIT 20000""" % cam_name
print sql
cursor.execute(sql)
retval = cursor.fetchall()
for values in retval:
(id, image) = values
id_string = "%s" % id
path_string = ""
for i in range(len(id_string)):
path_string = "%s/%s" % (path_string, id_string[i])
imdir = "%s%s" % (topLevel, path_string)
try:
os.makedirs(imdir)
except:
pass
image_path = "%s.jpg" % imdir
print image_path
fp = open(image_path, "w")
fp.write(image)
fp.close();
sql = """UPDATE %s_anpr_vega SET image=NULL WHERE id=%s""" % (cam_name, id)
print sql
cursor.execute(sql)
sql = """OPTIMIZE TABLE %s_anpr_vega """ % cam_name
print sql
#cursor.execute(sql)
Are you sure this isn't just creating image files named ".jpg" in each folder?
It looks to me like you forgot the filename portion of your image_path. You should see that in the prints coming out of that code.
If you're using Windows Explorer or the like you might not notice the dot-named JPEG files.