Python Tar Backup and Purge

Python Tar Backup and Purge

While I was working on a related project to use python to write to cloud object storage and the logic around purging; I jotted down some quick and dirty code here for my reference to build on. Normally I would recommend using the excellent restic program but in this case I am forced to use native API’s.

This serves as a reminder for it is only a very elementary tar plus gzip daily backup and subsequent purging of old backups. Just a test.

#!/usr/bin/python
#
#: Script Name  : tarBak.py
#: Author       : Riaan Rossouw
#: Date Created : March 13, 2019
#: Date Updated : March 13, 2019
#: Description  : Python Script to manage tar backups
#: Examples     : tarBak.py -t target -f 'folder1,folder2' -c -g GOLD
#:              : tarBak.py --target <backup folder> --folders <folders> --create

import optparse, os, glob, sys, re, datetime
import tarfile
import socket

__version__ = '0.9.1'
optdesc = 'This script is used to manage tar backups of files'

parser = optparse.OptionParser(description=optdesc,version=os.path.basename(__file__) + ' ' + __version__)
parser.formatter.max_help_position = 50
parser.add_option('-t', '--target', help='Specify Target', dest='target', action='append')
parser.add_option('-f', '--folders', help='Specify Folders', dest='folders', action='append')
parser.add_option('-c', '--create', help='Create a new backup', dest='create', action='store_true',default=False)
parser.add_option('-p', '--purge', help='Purge older backups per policy', dest='purge', action='store_true',default=False)
parser.add_option('-g', '--group', help='Policy group', dest='group', action='append')
parser.add_option('-l', '--list', help='List backups', dest='listall', action='store_true',default=False)
opts, args = parser.parse_args()

def make_tarfile(output_filename, source_dirs):
  with tarfile.open(output_filename, "w:gz") as tar:
    for source_dir in source_dirs:
      tar.add(source_dir, arcname=os.path.basename(source_dir))

def getBackupType(backup_time_created):
  utc,mt = str(backup_time_created).split('.')
  d = datetime.datetime.strptime(utc, '%Y-%m-%d %H:%M:%S').date()
  dt = d.strftime('%a %d %B %Y')

  if d.weekday() == 6:
    backup_t = 'WEEKLY'
  elif d.day == 1:
    backup_t = 'MONTHLY'
  elif ( (d.day == 1) and (d.mon == 1) ):
    backup_t = 'YEARLY'
  else:
    backup_t = 'DAILY'

  return (backup_t,dt)

def listBackups(target):
  print ("Listing backup files..")

  files = glob.glob(target + "*DAILY*")
  files.sort(key=os.path.getmtime, reverse=True)

  for file in files:
    print file
  
def purgeBackups(target, group):
  print ("Purging backup files..this needs testing and more logic for SILVER and BRONZE policies?")

  files = glob.glob(target + "*.tgz*")
  files.sort(key=os.path.getmtime, reverse=True)
  daily = 0
  weekly = 0
  monthly = 0
  yearly = 0
 
  for file in files:
    comment = ""
    if ( ("DAILY" in file) or ("WEEKLY" in file) or ("MONTHLY" in file) or ("YEARLY" in file) ):
      #t = file.split("-")[0]
      sub = re.search('files-(.+?)-2019', file)
      #print sub
      t = sub.group(1)
    else:
      t = "MANUAL"

    if t == "DAILY":
      comment = "DAILY"
      daily = daily + 1
      if daily > 7:
        comment = comment + " this one is more than 7 deleting"
        os.remove(file)
    elif t == "WEEKLY":
      comment = "Sun"
      weekly = weekly + 1
      if weekly > 4:
        comment = comment + " this one is more than 4 deleting"
        os.remove(file)
    elif t  == "MONTHLY":
      comment = "01"
      monthly = monthly + 1
      if monthly > 12:
       comment = comment + " this one is more than 12 deleting"
       os.remove(file)
    elif t  == "YEARLY":
      comment = "01"
      yearly = yearly + 1
      if yearly > 5:
       comment = comment + " this one is more than 5 deleting"
       os.remove(file)
    else:
      comment = " manual snapshot not purging"
      
    if  "this one " in comment:
      print ('DELETE: {:25}: {:25}'.format(file, comment) )

def createBackup(target, folders, group):
  print ("creating backup of " + str(folders))
  hostname = socket.gethostname()
  creationDate = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.0")
  t,ds = getBackupType(creationDate)
  BackupName = target + "/" + hostname + '-files-' + t + "-" + datetime.datetime.now().strftime("%Y%m%d-%H%MCST") + '.tgz'

  proceed = "SNAPSHOT NOT NEEDED AT THIS TIME PER THE POLICY"
  if ( group == "BRONZE") and ( (t == "MONTHLY") or (t == "YEARLY") ):
    proceed = "CLEAR TO SNAP" 
  elif ( group == "SILVER" and (t == "WEEKLY") or (t == "MONTHLY" ) or (t == "YEARLY") ):
    proceed = "CLEAR TO SNAP" 
  elif group == "GOLD":
    proceed = "CLEAR TO SNAP" 
  else:
    result = proceed
  
  make_tarfile(BackupName, folders)

def main():
  if opts.target:
    target = opts.target[0]
  else:
    print ("\n\n must specify target folder")
    exit(0)

  if opts.listall:
    listBackups(target)
  else:
    if opts.create:
      if opts.folders:
        folders = opts.folders[0].split(',')
      else:
        print ("\n\n must specify folders")
        exit(0)
      createBackup(target, folders, opts.group[0])

    if opts.purge:
      purgeBackups(target, opts.group[0])

if __name__ == '__main__':
  main()

And running it like this:

$ python tarBak.py -t /tmp/MyBackups/ -f '/home/rrosso,/var/log/syslog' -g GOLD -c
creating backup of ['/home/rrosso', '/var/log/syslog']

$ python tarBak.py -t /tmp/MyBackups/ -p -g GOLD
Purging backup files..this needs testing and more logic for SILVER and BRONZE policies?
DELETE: /tmp/MyBackups/xubuntu32-files-DAILY-20190313-1420CST.tgz: DAILY this one is more than 7 deleting
$ crontab -l | tail -1
0 5 * * * cd /Src/tarBak/ ; python tarBak.py -t /MyBackups/ -f '/home/rrosso,/var/spool/syslog' -c -p -g GOLD 2>&amp;1