AWS Glacier Backups

A short list of commands to help zip or image data and content into mid-size chunks that can be stored in S3 buckets. Since we are storing backups on AWS S3 Glacier, which needs to be restores before access, we want to keep the number of files reasonable. AWS charges for each API request, so incentive to store bigger files. The max limit on single file is 4GB. So lets go with file size in the range of 100MB - 3GB.

Common aws commands:

# Determine size of dir
du -h /path/to/folder1

# Create encrypted zip
zip -er /path/to/folder

# List current mounts and their sizes
df - h /Volumes/tmp

# Create tar.gz file
tar -czf /Volumes/tmp/docs.tar.gz folder1 folder2

# Encrypt using openssl
openssl aes-256-cbc -pass file:storage.enc_key -in docs.tar.gz -out docs.tar.gz.enc

# Create encrypted tars by piping commands
tar -cz folder1 folder2 | openssl aes-256-cbc -pass file:storage.enc_key -out /Volumes/tmp/docs.tar.gz.enc
openssl aes-256-cbc -d -pass file:storage.enc_key -in /Volumes/tmp/docs.tar.gz.enc | tar -xzC /Volumes/tmp/folder1

# Install & configure awscli
# config is stored in .aws dir
brew install awscli
aws configure
    AWS Access Key ID:      <access-id>
    AWS Secret Access Key:  <secret-access>
    Default region name:    us-west-2
    Default output format:  json

# List vaults in Glacier.
# '-' implies use accountId associated with credentials
aws glacier list-vaults --account-id -

# List jobs in Glacier Vault (generic)
# After "initiate-job", check status using "list-jobs"
aws glacier list-jobs \
    --account-id - \
    --vault-name docs

# Prepare the list of archives in Glacier Vault, make list available for download.
# Job takes 3-5 hours to complete
aws glacier initiate-job \
    --account-id - \
    --vault docs \
    --job-parameters '{ "Type": "inventory-retrieval" }'

# Fetch the list of archives using job-id from previous job
# File archiveList.json contains details of archives in vault
aws glacier get-job-output \
    --account-id - \
    --vault-name docs \
    --job-id "j6ig7qCeJ4Ortc-D83EgHsNxm3RriaAkyEFma37EU07Wxc_5BQfwllggqsgH_JfLusxIV" \

# Upload archive to Glacier (small files)
aws glacier upload-archive \
    --account-id - \
    --vault-name docs \
    --body docs-2008.tar.gz

# Upload archive to Glacier (large files) using script (multi-part upload)
# Enter vaultName and names of uploadFiles in
cd ~/Code/github/data-backr

# Delete archive using archive-id from intentory-retrieval
aws glacier delete-archive \
    --account-id - \
    --vault-name docs \
    --archive-id "3y4YOvvTJ2ssaRnzq2PnWSfKwmI6ociaStj71gjVZjQ2EOnRxfuJ7C5qeE9pS3qq74vumcp80"

# Retrieve archive from Glacier, make it available for download
# Job takes 3-5 hours to complete
aws glacier initiate-job \
    --account-id - \
    --vault-name docs \
    --job-parameters file://archive-retrieval.json

# Donwload archived file using job-id from previous job
aws glacier get-job-output \
    --account-id - \
    --vault-name docs \
    --job-id "xGvIJyQPC9weheMNwIf4s2z8Zct1lYGvjzdxz84VwhD3qadoOkMGo-FYaLJ5psLKhhcFDjC1n" \

# Decrypt using openssl
openssl aes-256-cbc -d -pass file:storage.enc_key -in docs.tar.gz.enc -out docs.tar.gz

# Untar file
tar -xf docs.tar.gz
  "Type": "archive-retrieval",
  "ArchiveId": "AveGlBWdJIDk8-THelSpu8FFo34KUmg8pVOQFvMxEQzM8MXMC6A4V7XcQZdP9m33ZpCGhsrMXnAgn05ng2xDvHHGFSRUj",
  "Description": "Retrieve SQL dump for audit team"