Package CedarBackup2 :: Package extend :: Module split
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.extend.split

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2007,2010,2013 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python 2 (>= 2.7) 
 29  # Project  : Official Cedar Backup Extensions 
 30  # Purpose  : Provides an extension to split up large files in staging directories. 
 31  # 
 32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 33   
 34  ######################################################################## 
 35  # Module documentation 
 36  ######################################################################## 
 37   
 38  """ 
 39  Provides an extension to split up large files in staging directories. 
 40   
 41  When this extension is executed, it will look through the configured Cedar 
 42  Backup staging directory for files exceeding a specified size limit, and split 
 43  them down into smaller files using the 'split' utility.  Any directory which 
 44  has already been split (as indicated by the C{cback.split} file) will be 
 45  ignored. 
 46   
 47  This extension requires a new configuration section <split> and is intended 
 48  to be run immediately after the standard stage action or immediately before the 
 49  standard store action.  Aside from its own configuration, it requires the 
 50  options and staging configuration sections in the standard Cedar Backup 
 51  configuration file. 
 52   
 53  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 54  """ 
 55   
 56  ######################################################################## 
 57  # Imported modules 
 58  ######################################################################## 
 59   
 60  # System modules 
 61  import os 
 62  import re 
 63  import logging 
 64   
 65  # Cedar Backup modules 
 66  from CedarBackup2.util import resolveCommand, executeCommand, changeOwnership 
 67  from CedarBackup2.xmlutil import createInputDom, addContainerNode 
 68  from CedarBackup2.xmlutil import readFirstChild 
 69  from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles 
 70  from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode 
 71   
 72   
 73  ######################################################################## 
 74  # Module-wide constants and variables 
 75  ######################################################################## 
 76   
 77  logger = logging.getLogger("CedarBackup2.log.extend.split") 
 78   
 79  SPLIT_COMMAND = [ "split", ] 
 80  SPLIT_INDICATOR = "cback.split" 
81 82 83 ######################################################################## 84 # SplitConfig class definition 85 ######################################################################## 86 87 -class SplitConfig(object):
88 89 """ 90 Class representing split configuration. 91 92 Split configuration is used for splitting staging directories. 93 94 The following restrictions exist on data in this class: 95 96 - The size limit must be a ByteQuantity 97 - The split size must be a ByteQuantity 98 99 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize 100 """ 101
102 - def __init__(self, sizeLimit=None, splitSize=None):
103 """ 104 Constructor for the C{SplitCOnfig} class. 105 106 @param sizeLimit: Size limit of the files, in bytes 107 @param splitSize: Size that files exceeding the limit will be split into, in bytes 108 109 @raise ValueError: If one of the values is invalid. 110 """ 111 self._sizeLimit = None 112 self._splitSize = None 113 self.sizeLimit = sizeLimit 114 self.splitSize = splitSize
115
116 - def __repr__(self):
117 """ 118 Official string representation for class instance. 119 """ 120 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
121
122 - def __str__(self):
123 """ 124 Informal string representation for class instance. 125 """ 126 return self.__repr__()
127
128 - def __cmp__(self, other):
129 """ 130 Definition of equals operator for this class. 131 Lists within this class are "unordered" for equality comparisons. 132 @param other: Other object to compare to. 133 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 134 """ 135 if other is None: 136 return 1 137 if self.sizeLimit != other.sizeLimit: 138 if self.sizeLimit < other.sizeLimit: 139 return -1 140 else: 141 return 1 142 if self.splitSize != other.splitSize: 143 if self.splitSize < other.splitSize: 144 return -1 145 else: 146 return 1 147 return 0
148
149 - def _setSizeLimit(self, value):
150 """ 151 Property target used to set the size limit. 152 If not C{None}, the value must be a C{ByteQuantity} object. 153 @raise ValueError: If the value is not a C{ByteQuantity} 154 """ 155 if value is None: 156 self._sizeLimit = None 157 else: 158 if not isinstance(value, ByteQuantity): 159 raise ValueError("Value must be a C{ByteQuantity} object.") 160 self._sizeLimit = value
161
162 - def _getSizeLimit(self):
163 """ 164 Property target used to get the size limit. 165 """ 166 return self._sizeLimit
167
168 - def _setSplitSize(self, value):
169 """ 170 Property target used to set the split size. 171 If not C{None}, the value must be a C{ByteQuantity} object. 172 @raise ValueError: If the value is not a C{ByteQuantity} 173 """ 174 if value is None: 175 self._splitSize = None 176 else: 177 if not isinstance(value, ByteQuantity): 178 raise ValueError("Value must be a C{ByteQuantity} object.") 179 self._splitSize = value
180
181 - def _getSplitSize(self):
182 """ 183 Property target used to get the split size. 184 """ 185 return self._splitSize
186 187 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity") 188 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
189
190 191 ######################################################################## 192 # LocalConfig class definition 193 ######################################################################## 194 195 -class LocalConfig(object):
196 197 """ 198 Class representing this extension's configuration document. 199 200 This is not a general-purpose configuration object like the main Cedar 201 Backup configuration object. Instead, it just knows how to parse and emit 202 split-specific configuration values. Third parties who need to read and 203 write configuration related to this extension should access it through the 204 constructor, C{validate} and C{addConfig} methods. 205 206 @note: Lists within this class are "unordered" for equality comparisons. 207 208 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig 209 """ 210
211 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
212 """ 213 Initializes a configuration object. 214 215 If you initialize the object without passing either C{xmlData} or 216 C{xmlPath} then configuration will be empty and will be invalid until it 217 is filled in properly. 218 219 No reference to the original XML data or original path is saved off by 220 this class. Once the data has been parsed (successfully or not) this 221 original information is discarded. 222 223 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 224 method will be called (with its default arguments) against configuration 225 after successfully parsing any passed-in XML. Keep in mind that even if 226 C{validate} is C{False}, it might not be possible to parse the passed-in 227 XML document if lower-level validations fail. 228 229 @note: It is strongly suggested that the C{validate} option always be set 230 to C{True} (the default) unless there is a specific need to read in 231 invalid configuration from disk. 232 233 @param xmlData: XML data representing configuration. 234 @type xmlData: String data. 235 236 @param xmlPath: Path to an XML file on disk. 237 @type xmlPath: Absolute path to a file on disk. 238 239 @param validate: Validate the document after parsing it. 240 @type validate: Boolean true/false. 241 242 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 243 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 244 @raise ValueError: If the parsed configuration document is not valid. 245 """ 246 self._split = None 247 self.split = None 248 if xmlData is not None and xmlPath is not None: 249 raise ValueError("Use either xmlData or xmlPath, but not both.") 250 if xmlData is not None: 251 self._parseXmlData(xmlData) 252 if validate: 253 self.validate() 254 elif xmlPath is not None: 255 xmlData = open(xmlPath).read() 256 self._parseXmlData(xmlData) 257 if validate: 258 self.validate()
259
260 - def __repr__(self):
261 """ 262 Official string representation for class instance. 263 """ 264 return "LocalConfig(%s)" % (self.split)
265
266 - def __str__(self):
267 """ 268 Informal string representation for class instance. 269 """ 270 return self.__repr__()
271
272 - def __cmp__(self, other):
273 """ 274 Definition of equals operator for this class. 275 Lists within this class are "unordered" for equality comparisons. 276 @param other: Other object to compare to. 277 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 278 """ 279 if other is None: 280 return 1 281 if self.split != other.split: 282 if self.split < other.split: 283 return -1 284 else: 285 return 1 286 return 0
287
288 - def _setSplit(self, value):
289 """ 290 Property target used to set the split configuration value. 291 If not C{None}, the value must be a C{SplitConfig} object. 292 @raise ValueError: If the value is not a C{SplitConfig} 293 """ 294 if value is None: 295 self._split = None 296 else: 297 if not isinstance(value, SplitConfig): 298 raise ValueError("Value must be a C{SplitConfig} object.") 299 self._split = value
300
301 - def _getSplit(self):
302 """ 303 Property target used to get the split configuration value. 304 """ 305 return self._split
306 307 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.") 308
309 - def validate(self):
310 """ 311 Validates configuration represented by the object. 312 313 Split configuration must be filled in. Within that, both the size limit 314 and split size must be filled in. 315 316 @raise ValueError: If one of the validations fails. 317 """ 318 if self.split is None: 319 raise ValueError("Split section is required.") 320 if self.split.sizeLimit is None: 321 raise ValueError("Size limit must be set.") 322 if self.split.splitSize is None: 323 raise ValueError("Split size must be set.")
324
325 - def addConfig(self, xmlDom, parentNode):
326 """ 327 Adds a <split> configuration section as the next child of a parent. 328 329 Third parties should use this function to write configuration related to 330 this extension. 331 332 We add the following fields to the document:: 333 334 sizeLimit //cb_config/split/size_limit 335 splitSize //cb_config/split/split_size 336 337 @param xmlDom: DOM tree as from C{impl.createDocument()}. 338 @param parentNode: Parent that the section should be appended to. 339 """ 340 if self.split is not None: 341 sectionNode = addContainerNode(xmlDom, parentNode, "split") 342 addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit) 343 addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
344
345 - def _parseXmlData(self, xmlData):
346 """ 347 Internal method to parse an XML string into the object. 348 349 This method parses the XML document into a DOM tree (C{xmlDom}) and then 350 calls a static method to parse the split configuration section. 351 352 @param xmlData: XML data to be parsed 353 @type xmlData: String data 354 355 @raise ValueError: If the XML cannot be successfully parsed. 356 """ 357 (xmlDom, parentNode) = createInputDom(xmlData) 358 self._split = LocalConfig._parseSplit(parentNode)
359 360 @staticmethod
361 - def _parseSplit(parent):
362 """ 363 Parses an split configuration section. 364 365 We read the following individual fields:: 366 367 sizeLimit //cb_config/split/size_limit 368 splitSize //cb_config/split/split_size 369 370 @param parent: Parent node to search beneath. 371 372 @return: C{EncryptConfig} object or C{None} if the section does not exist. 373 @raise ValueError: If some filled-in value is invalid. 374 """ 375 split = None 376 section = readFirstChild(parent, "split") 377 if section is not None: 378 split = SplitConfig() 379 split.sizeLimit = readByteQuantity(section, "size_limit") 380 split.splitSize = readByteQuantity(section, "split_size") 381 return split
382
383 384 ######################################################################## 385 # Public functions 386 ######################################################################## 387 388 ########################### 389 # executeAction() function 390 ########################### 391 392 -def executeAction(configPath, options, config):
393 """ 394 Executes the split backup action. 395 396 @param configPath: Path to configuration file on disk. 397 @type configPath: String representing a path on disk. 398 399 @param options: Program command-line options. 400 @type options: Options object. 401 402 @param config: Program configuration. 403 @type config: Config object. 404 405 @raise ValueError: Under many generic error conditions 406 @raise IOError: If there are I/O problems reading or writing files 407 """ 408 logger.debug("Executing split extended action.") 409 if config.options is None or config.stage is None: 410 raise ValueError("Cedar Backup configuration is not properly filled in.") 411 local = LocalConfig(xmlPath=configPath) 412 dailyDirs = findDailyDirs(config.stage.targetDir, SPLIT_INDICATOR) 413 for dailyDir in dailyDirs: 414 _splitDailyDir(dailyDir, local.split.sizeLimit, local.split.splitSize, 415 config.options.backupUser, config.options.backupGroup) 416 writeIndicatorFile(dailyDir, SPLIT_INDICATOR, config.options.backupUser, config.options.backupGroup) 417 logger.info("Executed the split extended action successfully.")
418
419 420 ############################## 421 # _splitDailyDir() function 422 ############################## 423 424 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
425 """ 426 Splits large files in a daily staging directory. 427 428 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"}, 429 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored. 430 All other files are split. 431 432 @param dailyDir: Daily directory to encrypt 433 @param sizeLimit: Size limit, in bytes 434 @param splitSize: Split size, in bytes 435 @param backupUser: User that target files should be owned by 436 @param backupGroup: Group that target files should be owned by 437 438 @raise ValueError: If the encrypt mode is not supported. 439 @raise ValueError: If the daily staging directory does not exist. 440 """ 441 logger.debug("Begin splitting contents of [%s].", dailyDir) 442 fileList = getBackupFiles(dailyDir) # ignores indicator files 443 for path in fileList: 444 size = float(os.stat(path).st_size) 445 if size > sizeLimit: 446 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True) 447 logger.debug("Completed splitting contents of [%s].", dailyDir)
448
449 450 ######################## 451 # _splitFile() function 452 ######################## 453 454 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
455 """ 456 Splits the source file into chunks of the indicated size. 457 458 The split files will be owned by the indicated backup user and group. If 459 C{removeSource} is C{True}, then the source file will be removed after it is 460 successfully split. 461 462 @param sourcePath: Absolute path of the source file to split 463 @param splitSize: Encryption mode (only "gpg" is allowed) 464 @param backupUser: User that target files should be owned by 465 @param backupGroup: Group that target files should be owned by 466 @param removeSource: Indicates whether to remove the source file 467 468 @raise IOError: If there is a problem accessing, splitting or removing the source file. 469 """ 470 cwd = os.getcwd() 471 try: 472 if not os.path.exists(sourcePath): 473 raise ValueError("Source path [%s] does not exist." % sourcePath) 474 dirname = os.path.dirname(sourcePath) 475 filename = os.path.basename(sourcePath) 476 prefix = "%s_" % filename 477 bytes = int(splitSize.bytes) # pylint: disable=W0622 478 os.chdir(dirname) # need to operate from directory that we want files written to 479 command = resolveCommand(SPLIT_COMMAND) 480 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ] 481 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False) 482 if result != 0: 483 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath)) 484 pattern = re.compile(r"(creating file [`'])(%s)(.*)(')" % prefix) 485 match = pattern.search(output[-1:][0]) 486 if match is None: 487 raise IOError("Unable to parse output from split command.") 488 value = int(match.group(3).strip()) 489 for index in range(0, value): 490 path = "%s%05d" % (prefix, index) 491 if not os.path.exists(path): 492 raise IOError("After call to split, expected file [%s] does not exist." % path) 493 changeOwnership(path, backupUser, backupGroup) 494 if removeSource: 495 if os.path.exists(sourcePath): 496 try: 497 os.remove(sourcePath) 498 logger.debug("Completed removing old file [%s].", sourcePath) 499 except: 500 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath)) 501 finally: 502 os.chdir(cwd)
503