1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 """
40 Store-type extension that writes data to Amazon S3.
41
42 This extension requires a new configuration section <amazons3> and is intended
43 to be run immediately after the standard stage action, replacing the standard
44 store action. Aside from its own configuration, it requires the options and
45 staging configuration sections in the standard Cedar Backup configuration file.
46 Since it is intended to replace the store action, it does not rely on any store
47 configuration.
48
49 The underlying functionality relies on the U{AWS CLI interface
50 <http://aws.amazon.com/documentation/cli/>}. Before you use this extension,
51 you need to set up your Amazon S3 account and configure the AWS CLI connection
52 per Amazon's documentation. The extension assumes that the backup is being
53 executed as root, and switches over to the configured backup user to
54 communicate with AWS. So, make sure you configure AWS CLI as the backup user
55 and not root.
56
57 You can optionally configure Cedar Backup to encrypt data before sending it
58 to S3. To do that, provide a complete command line using the C{${input}} and
59 C{${output}} variables to represent the original input file and the encrypted
60 output file. This command will be executed as the backup user.
61
62 For instance, you can use something like this with GPG::
63
64 /usr/bin/gpg -c --no-use-agent --batch --yes --passphrase-file /home/backup/.passphrase -o ${output} ${input}
65
66 The GPG mechanism depends on a strong passphrase for security. One way to
67 generate a strong passphrase is using your system random number generator, i.e.::
68
69 dd if=/dev/urandom count=20 bs=1 | xxd -ps
70
71 (See U{StackExchange <http://security.stackexchange.com/questions/14867/gpg-encryption-security>}
72 for more details about that advice.) If you decide to use encryption, make sure
73 you save off the passphrase in a safe place, so you can get at your backup data
74 later if you need to. And obviously, make sure to set permissions on the
75 passphrase file so it can only be read by the backup user.
76
77 This extension was written for and tested on Linux. It will throw an exception
78 if run on Windows.
79
80 @author: Kenneth J. Pronovici <pronovic@ieee.org>
81 """
82
83
84
85
86
87
88 import sys
89 import os
90 import logging
91 import tempfile
92 import datetime
93 import json
94 import shutil
95
96
97 from CedarBackup2.filesystem import FilesystemList
98 from CedarBackup2.util import resolveCommand, executeCommand, isRunningAsRoot, changeOwnership
99 from CedarBackup2.xmlutil import createInputDom, addContainerNode, addBooleanNode, addStringNode
100 from CedarBackup2.xmlutil import readFirstChild, readString, readBoolean
101 from CedarBackup2.actions.util import writeIndicatorFile
102 from CedarBackup2.actions.constants import DIR_TIME_FORMAT, STAGE_INDICATOR
103
104
105
106
107
108
109 logger = logging.getLogger("CedarBackup2.log.extend.amazons3")
110
111 SU_COMMAND = [ "su" ]
112 AWS_COMMAND = [ "aws" ]
113
114 STORE_INDICATOR = "cback.amazons3"
122
123 """
124 Class representing Amazon S3 configuration.
125
126 Amazon S3 configuration is used for storing backup data in Amazon's S3 cloud
127 storage using the C{s3cmd} tool.
128
129 The following restrictions exist on data in this class:
130
131 - The s3Bucket value must be a non-empty string
132 - The encryptCommand value, if set, must be a non-empty string
133
134 @sort: __init__, __repr__, __str__, __cmp__, warnMidnite, s3Bucket
135 """
136
137 - def __init__(self, warnMidnite=None, s3Bucket=None, encryptCommand=None):
138 """
139 Constructor for the C{AmazonS3Config} class.
140
141 @param warnMidnite: Whether to generate warnings for crossing midnite.
142 @param s3Bucket: Name of the Amazon S3 bucket in which to store the data
143 @param encryptCommand: Command used to encrypt backup data before upload to S3
144
145 @raise ValueError: If one of the values is invalid.
146 """
147 self._warnMidnite = None
148 self._s3Bucket = None
149 self._encryptCommand = None
150 self.warnMidnite = warnMidnite
151 self.s3Bucket = s3Bucket
152 self.encryptCommand = encryptCommand
153
155 """
156 Official string representation for class instance.
157 """
158 return "AmazonS3Config(%s, %s, %s)" % (self.warnMidnite, self.s3Bucket, self.encryptCommand)
159
161 """
162 Informal string representation for class instance.
163 """
164 return self.__repr__()
165
190
192 """
193 Property target used to set the midnite warning flag.
194 No validations, but we normalize the value to C{True} or C{False}.
195 """
196 if value:
197 self._warnMidnite = True
198 else:
199 self._warnMidnite = False
200
202 """
203 Property target used to get the midnite warning flag.
204 """
205 return self._warnMidnite
206
208 """
209 Property target used to set the S3 bucket.
210 """
211 if value is not None:
212 if len(value) < 1:
213 raise ValueError("S3 bucket must be non-empty string.")
214 self._s3Bucket = value
215
217 """
218 Property target used to get the S3 bucket.
219 """
220 return self._s3Bucket
221
223 """
224 Property target used to set the encrypt command.
225 """
226 if value is not None:
227 if len(value) < 1:
228 raise ValueError("Encrypt command must be non-empty string.")
229 self._encryptCommand = value
230
232 """
233 Property target used to get the encrypt command.
234 """
235 return self._encryptCommand
236
237 warnMidnite = property(_getWarnMidnite, _setWarnMidnite, None, "Whether to generate warnings for crossing midnite.")
238 s3Bucket = property(_getS3Bucket, _setS3Bucket, None, doc="Amazon S3 Bucket in which to store data")
239 encryptCommand = property(_getEncryptCommand, _setEncryptCommand, None, doc="Command used to encrypt data before upload to S3")
240
247
248 """
249 Class representing this extension's configuration document.
250
251 This is not a general-purpose configuration object like the main Cedar
252 Backup configuration object. Instead, it just knows how to parse and emit
253 amazons3-specific configuration values. Third parties who need to read and
254 write configuration related to this extension should access it through the
255 constructor, C{validate} and C{addConfig} methods.
256
257 @note: Lists within this class are "unordered" for equality comparisons.
258
259 @sort: __init__, __repr__, __str__, __cmp__, amazons3, validate, addConfig
260 """
261
262 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
263 """
264 Initializes a configuration object.
265
266 If you initialize the object without passing either C{xmlData} or
267 C{xmlPath} then configuration will be empty and will be invalid until it
268 is filled in properly.
269
270 No reference to the original XML data or original path is saved off by
271 this class. Once the data has been parsed (successfully or not) this
272 original information is discarded.
273
274 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate}
275 method will be called (with its default arguments) against configuration
276 after successfully parsing any passed-in XML. Keep in mind that even if
277 C{validate} is C{False}, it might not be possible to parse the passed-in
278 XML document if lower-level validations fail.
279
280 @note: It is strongly suggested that the C{validate} option always be set
281 to C{True} (the default) unless there is a specific need to read in
282 invalid configuration from disk.
283
284 @param xmlData: XML data representing configuration.
285 @type xmlData: String data.
286
287 @param xmlPath: Path to an XML file on disk.
288 @type xmlPath: Absolute path to a file on disk.
289
290 @param validate: Validate the document after parsing it.
291 @type validate: Boolean true/false.
292
293 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in.
294 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed.
295 @raise ValueError: If the parsed configuration document is not valid.
296 """
297 self._amazons3 = None
298 self.amazons3 = None
299 if xmlData is not None and xmlPath is not None:
300 raise ValueError("Use either xmlData or xmlPath, but not both.")
301 if xmlData is not None:
302 self._parseXmlData(xmlData)
303 if validate:
304 self.validate()
305 elif xmlPath is not None:
306 xmlData = open(xmlPath).read()
307 self._parseXmlData(xmlData)
308 if validate:
309 self.validate()
310
312 """
313 Official string representation for class instance.
314 """
315 return "LocalConfig(%s)" % (self.amazons3)
316
318 """
319 Informal string representation for class instance.
320 """
321 return self.__repr__()
322
324 """
325 Definition of equals operator for this class.
326 Lists within this class are "unordered" for equality comparisons.
327 @param other: Other object to compare to.
328 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
329 """
330 if other is None:
331 return 1
332 if self.amazons3 != other.amazons3:
333 if self.amazons3 < other.amazons3:
334 return -1
335 else:
336 return 1
337 return 0
338
340 """
341 Property target used to set the amazons3 configuration value.
342 If not C{None}, the value must be a C{AmazonS3Config} object.
343 @raise ValueError: If the value is not a C{AmazonS3Config}
344 """
345 if value is None:
346 self._amazons3 = None
347 else:
348 if not isinstance(value, AmazonS3Config):
349 raise ValueError("Value must be a C{AmazonS3Config} object.")
350 self._amazons3 = value
351
353 """
354 Property target used to get the amazons3 configuration value.
355 """
356 return self._amazons3
357
358 amazons3 = property(_getAmazonS3, _setAmazonS3, None, "AmazonS3 configuration in terms of a C{AmazonS3Config} object.")
359
361 """
362 Validates configuration represented by the object.
363
364 AmazonS3 configuration must be filled in. Within that, the s3Bucket target must be filled in
365
366 @raise ValueError: If one of the validations fails.
367 """
368 if self.amazons3 is None:
369 raise ValueError("AmazonS3 section is required.")
370 if self.amazons3.s3Bucket is None:
371 raise ValueError("AmazonS3 s3Bucket must be set.")
372
374 """
375 Adds an <amazons3> configuration section as the next child of a parent.
376
377 Third parties should use this function to write configuration related to
378 this extension.
379
380 We add the following fields to the document::
381
382 warnMidnite //cb_config/amazons3/warn_midnite
383 s3Bucket //cb_config/amazons3/s3_bucket
384 encryptCommand //cb_config/amazons3/encrypt
385
386 @param xmlDom: DOM tree as from C{impl.createDocument()}.
387 @param parentNode: Parent that the section should be appended to.
388 """
389 if self.amazons3 is not None:
390 sectionNode = addContainerNode(xmlDom, parentNode, "amazons3")
391 addBooleanNode(xmlDom, sectionNode, "warn_midnite", self.amazons3.warnMidnite)
392 addStringNode(xmlDom, sectionNode, "s3_bucket", self.amazons3.s3Bucket)
393 addStringNode(xmlDom, sectionNode, "encrypt", self.amazons3.encryptCommand)
394
396 """
397 Internal method to parse an XML string into the object.
398
399 This method parses the XML document into a DOM tree (C{xmlDom}) and then
400 calls a static method to parse the amazons3 configuration section.
401
402 @param xmlData: XML data to be parsed
403 @type xmlData: String data
404
405 @raise ValueError: If the XML cannot be successfully parsed.
406 """
407 (xmlDom, parentNode) = createInputDom(xmlData)
408 self._amazons3 = LocalConfig._parseAmazonS3(parentNode)
409
410 @staticmethod
412 """
413 Parses an amazons3 configuration section.
414
415 We read the following individual fields::
416
417 warnMidnite //cb_config/amazons3/warn_midnite
418 s3Bucket //cb_config/amazons3/s3_bucket
419 encryptCommand //cb_config/amazons3/encrypt
420
421 @param parent: Parent node to search beneath.
422
423 @return: C{AmazonS3Config} object or C{None} if the section does not exist.
424 @raise ValueError: If some filled-in value is invalid.
425 """
426 amazons3 = None
427 section = readFirstChild(parent, "amazons3")
428 if section is not None:
429 amazons3 = AmazonS3Config()
430 amazons3.warnMidnite = readBoolean(section, "warn_midnite")
431 amazons3.s3Bucket = readString(section, "s3_bucket")
432 amazons3.encryptCommand = readString(section, "encrypt")
433 return amazons3
434
435
436
437
438
439
440
441
442
443
444 -def executeAction(configPath, options, config):
445 """
446 Executes the amazons3 backup action.
447
448 @param configPath: Path to configuration file on disk.
449 @type configPath: String representing a path on disk.
450
451 @param options: Program command-line options.
452 @type options: Options object.
453
454 @param config: Program configuration.
455 @type config: Config object.
456
457 @raise ValueError: Under many generic error conditions
458 @raise IOError: If there are I/O problems reading or writing files
459 """
460 logger.debug("Executing amazons3 extended action.")
461 if not isRunningAsRoot():
462 logger.error("Error: the amazons3 extended action must be run as root.")
463 raise ValueError("The amazons3 extended action must be run as root.")
464 if sys.platform == "win32":
465 logger.error("Error: the amazons3 extended action is not supported on Windows.")
466 raise ValueError("The amazons3 extended action is not supported on Windows.")
467 if config.options is None or config.stage is None:
468 raise ValueError("Cedar Backup configuration is not properly filled in.")
469 local = LocalConfig(xmlPath=configPath)
470 stagingDirs = _findCorrectDailyDir(options, config, local)
471 _writeToAmazonS3(config, local, stagingDirs)
472 _writeStoreIndicator(config, stagingDirs)
473 logger.info("Executed the amazons3 extended action successfully.")
474
485 """
486 Finds the correct daily staging directory to be written to Amazon S3.
487
488 This is substantially similar to the same function in store.py. The
489 main difference is that it doesn't rely on store configuration at all.
490
491 @param options: Options object.
492 @param config: Config object.
493 @param local: Local config object.
494
495 @return: Correct staging dir, as a dict mapping directory to date suffix.
496 @raise IOError: If the staging directory cannot be found.
497 """
498 oneDay = datetime.timedelta(days=1)
499 today = datetime.date.today()
500 yesterday = today - oneDay
501 tomorrow = today + oneDay
502 todayDate = today.strftime(DIR_TIME_FORMAT)
503 yesterdayDate = yesterday.strftime(DIR_TIME_FORMAT)
504 tomorrowDate = tomorrow.strftime(DIR_TIME_FORMAT)
505 todayPath = os.path.join(config.stage.targetDir, todayDate)
506 yesterdayPath = os.path.join(config.stage.targetDir, yesterdayDate)
507 tomorrowPath = os.path.join(config.stage.targetDir, tomorrowDate)
508 todayStageInd = os.path.join(todayPath, STAGE_INDICATOR)
509 yesterdayStageInd = os.path.join(yesterdayPath, STAGE_INDICATOR)
510 tomorrowStageInd = os.path.join(tomorrowPath, STAGE_INDICATOR)
511 todayStoreInd = os.path.join(todayPath, STORE_INDICATOR)
512 yesterdayStoreInd = os.path.join(yesterdayPath, STORE_INDICATOR)
513 tomorrowStoreInd = os.path.join(tomorrowPath, STORE_INDICATOR)
514 if options.full:
515 if os.path.isdir(todayPath) and os.path.exists(todayStageInd):
516 logger.info("Amazon S3 process will use current day's staging directory [%s]" % todayPath)
517 return { todayPath:todayDate }
518 raise IOError("Unable to find staging directory to process (only tried today due to full option).")
519 else:
520 if os.path.isdir(todayPath) and os.path.exists(todayStageInd) and not os.path.exists(todayStoreInd):
521 logger.info("Amazon S3 process will use current day's staging directory [%s]" % todayPath)
522 return { todayPath:todayDate }
523 elif os.path.isdir(yesterdayPath) and os.path.exists(yesterdayStageInd) and not os.path.exists(yesterdayStoreInd):
524 logger.info("Amazon S3 process will use previous day's staging directory [%s]" % yesterdayPath)
525 if local.amazons3.warnMidnite:
526 logger.warn("Warning: Amazon S3 process crossed midnite boundary to find data.")
527 return { yesterdayPath:yesterdayDate }
528 elif os.path.isdir(tomorrowPath) and os.path.exists(tomorrowStageInd) and not os.path.exists(tomorrowStoreInd):
529 logger.info("Amazon S3 process will use next day's staging directory [%s]" % tomorrowPath)
530 if local.amazons3.warnMidnite:
531 logger.warn("Warning: Amazon S3 process crossed midnite boundary to find data.")
532 return { tomorrowPath:tomorrowDate }
533 raise IOError("Unable to find unused staging directory to process (tried today, yesterday, tomorrow).")
534
541 """
542 Writes the indicated staging directories to an Amazon S3 bucket.
543
544 Each of the staging directories listed in C{stagingDirs} will be written to
545 the configured Amazon S3 bucket from local configuration. The directories
546 will be placed into the image at the root by date, so staging directory
547 C{/opt/stage/2005/02/10} will be placed into the S3 bucket at C{/2005/02/10}.
548 If an encrypt commmand is provided, the files will be encrypted first.
549
550 @param config: Config object.
551 @param local: Local config object.
552 @param stagingDirs: Dictionary mapping directory path to date suffix.
553
554 @raise ValueError: Under many generic error conditions
555 @raise IOError: If there is a problem writing to Amazon S3
556 """
557 for stagingDir in stagingDirs.keys():
558 logger.debug("Storing stage directory to Amazon S3 [%s]." % stagingDir)
559 dateSuffix = stagingDirs[stagingDir]
560 s3BucketUrl = "s3://%s/%s" % (local.amazons3.s3Bucket, dateSuffix)
561 logger.debug("S3 bucket URL is [%s]" % s3BucketUrl)
562 _clearExistingBackup(config, s3BucketUrl)
563 if local.amazons3.encryptCommand is None:
564 logger.debug("Encryption is disabled; files will be uploaded in cleartext.")
565 _uploadStagingDir(config, stagingDir, s3BucketUrl)
566 _verifyUpload(config, stagingDir, s3BucketUrl)
567 else:
568 logger.debug("Encryption is enabled; files will be uploaded after being encrypted.")
569 encryptedDir = tempfile.mkdtemp(dir=config.options.workingDir)
570 changeOwnership(encryptedDir, config.options.backupUser, config.options.backupGroup)
571 try:
572 _encryptStagingDir(config, local, stagingDir, encryptedDir)
573 _uploadStagingDir(config, encryptedDir, s3BucketUrl)
574 _verifyUpload(config, encryptedDir, s3BucketUrl)
575 finally:
576 if os.path.exists(encryptedDir):
577 shutil.rmtree(encryptedDir)
578
594
601 """
602 Clear any existing backup files for an S3 bucket URL.
603 @param config: Config object.
604 @param s3BucketUrl: S3 bucket URL associated with the staging directory
605 """
606 suCommand = resolveCommand(SU_COMMAND)
607 awsCommand = resolveCommand(AWS_COMMAND)
608 actualCommand = "%s s3 rm --recursive %s/" % (awsCommand[0], s3BucketUrl)
609 result = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand])[0]
610 if result != 0:
611 raise IOError("Error [%d] calling AWS CLI to clear existing backup for [%s]." % (result, s3BucketUrl))
612 logger.debug("Completed clearing any existing backup in S3 for [%s]" % s3BucketUrl)
613
620 """
621 Upload the contents of a staging directory out to the Amazon S3 cloud.
622 @param config: Config object.
623 @param stagingDir: Staging directory to upload
624 @param s3BucketUrl: S3 bucket URL associated with the staging directory
625 """
626 suCommand = resolveCommand(SU_COMMAND)
627 awsCommand = resolveCommand(AWS_COMMAND)
628 actualCommand = "%s s3 cp --recursive %s/ %s/" % (awsCommand[0], stagingDir, s3BucketUrl)
629 result = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand])[0]
630 if result != 0:
631 raise IOError("Error [%d] calling AWS CLI to upload staging directory to [%s]." % (result, s3BucketUrl))
632 logger.debug("Completed uploading staging dir [%s] to [%s]" % (stagingDir, s3BucketUrl))
633
634
635
636
637
638
639 -def _verifyUpload(config, stagingDir, s3BucketUrl):
640 """
641 Verify that a staging directory was properly uploaded to the Amazon S3 cloud.
642 @param config: Config object.
643 @param stagingDir: Staging directory to verify
644 @param s3BucketUrl: S3 bucket URL associated with the staging directory
645 """
646 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1)
647 suCommand = resolveCommand(SU_COMMAND)
648 awsCommand = resolveCommand(AWS_COMMAND)
649 query = "Contents[].{Key: Key, Size: Size}"
650 actualCommand = "%s s3api list-objects --bucket %s --prefix %s --query '%s'" % (awsCommand[0], bucket, prefix, query)
651 (result, data) = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand], returnOutput=True)
652 if result != 0:
653 raise IOError("Error [%d] calling AWS CLI verify upload to [%s]." % (result, s3BucketUrl))
654 contents = { }
655 for entry in json.loads("".join(data)):
656 key = entry["Key"].replace(prefix, "")
657 size = long(entry["Size"])
658 contents[key] = size
659 files = FilesystemList()
660 files.addDirContents(stagingDir)
661 for entry in files:
662 if os.path.isfile(entry):
663 key = entry.replace(stagingDir, "")
664 size = long(os.stat(entry).st_size)
665 if not key in contents:
666 raise IOError("File was apparently not uploaded: [%s]" % entry)
667 else:
668 if size != contents[key]:
669 raise IOError("File size differs [%s], expected %s bytes but got %s bytes" % (entry, size, contents[key]))
670 logger.debug("Completed verifying upload from [%s] to [%s]." % (stagingDir, s3BucketUrl))
671
678 """
679 Encrypt a staging directory, creating a new directory in the process.
680 @param config: Config object.
681 @param stagingDir: Staging directory to use as source
682 @param encryptedDir: Target directory into which encrypted files should be written
683 """
684 suCommand = resolveCommand(SU_COMMAND)
685 files = FilesystemList()
686 files.addDirContents(stagingDir)
687 for cleartext in files:
688 if os.path.isfile(cleartext):
689 encrypted = "%s%s" % (encryptedDir, cleartext.replace(stagingDir, ""))
690 if long(os.stat(cleartext).st_size) == 0:
691 open(encrypted, 'a').close()
692 else:
693 actualCommand = local.amazons3.encryptCommand.replace("${input}", cleartext).replace("${output}", encrypted)
694 subdir = os.path.dirname(encrypted)
695 if not os.path.isdir(subdir):
696 os.makedirs(subdir)
697 changeOwnership(subdir, config.options.backupUser, config.options.backupGroup)
698 result = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand])[0]
699 if result != 0:
700 raise IOError("Error [%d] encrypting [%s]." % (result, cleartext))
701 logger.debug("Completed encrypting staging directory [%s] into [%s]" % (stagingDir, encryptedDir))
702