Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 141 additions & 57 deletions glacierupload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,146 @@
# sudo dnf install parallel
# sudo pip install awscli

byteSize=4194304

# count the number of files that begin with "part"
fileCount=$(ls -1 | grep "^part" | wc -l)
echo "Total parts to upload: " $fileCount

# get the list of part files to upload. Edit this if you chose a different prefix in the split command
files=$(ls | grep "^part")

# initiate multipart upload connection to glacier
init=$(aws glacier initiate-multipart-upload --account-id - --part-size $byteSize --vault-name media1 --archive-description "November 2015 Pictures and Videos")

echo "---------------------------------------"
# xargs trims off the quotes
# jq pulls out the json element titled uploadId
uploadId=$(echo $init | jq '.uploadId' | xargs)

# create temp file to store commands
touch commands.txt

# create upload commands to be run in parallel and store in commands.txt
i=0
for f in $files
do
byteStart=$((i*byteSize))
byteEnd=$((i*byteSize+byteSize-1))
echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/*'"'" --account-id - --vault-name media1 --upload-id $uploadId >> commands.txt
i=$(($i+1))

done

# run upload commands in parallel
# --load 100% option only gives new jobs out if the core is than 100% active
# -a commands.txt runs every line of that file in parallel, in potentially random order
# --notice supresses citation output to the console
# --bar provides a command line progress bar
parallel --load 100% -a commands.txt --no-notice --bar

echo "List Active Multipart Uploads:"
echo "Verify that a connection is open:"
aws glacier list-multipart-uploads --account-id - --vault-name media1

# end the multipart upload
aws glacier abort-multipart-upload --account-id - --vault-name media1 --upload-id $uploadId

# list open multipart connections
echo "------------------------------"
echo "List Active Multipart Uploads:"
echo "Verify that the connection is closed:"
aws glacier list-multipart-uploads --account-id - --vault-name media1

#echo "-------------"
#echo "Contents of commands.txt"
#cat commands.txt
echo "--------------"
echo "Deleting temporary commands.txt file"
rm commands.txt
jqCMD=$( which jq )
parallelCMD=$( which parallel )
pipCMD=$( which pip )
awsCMD=$( which aws )

if [ -z "$jqCMD" ] || [ -z "$pipCMD" ] || [ -z "$parallelCMD" ] || [ -z "$awsCMD" ]; then
echo; echo;
echo Missing prerequisites:
echo jq [ $jqCMD ]
echo pip [ $pipCMD ]
echo aws [ $awsCMD ]
echo paralell [ $paralellCMD ]
echo; echo;

exit 1
fi

if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ] || [ -z "$4" ]; then
echo; echo;
echo "Syntax: $0 [ Source File Name ] [ Profile Name ] [ Vault Name ] [ Archive Description ]"
echo; echo;

exit 1
fi

srcFile=$1
profileName=$2
vaultName=$3
archiveDesc=$4

if [ ! -s "$srcFile" ]; then
echo; echo;
echo "Source file does not exist [ $srcFile ]"
echo; echo;

exit 1
fi

partPrefix="glacierPart_"

# chunkMb needs to be 1, 2, 4, 8, 32, 64 - It's not recommended to go above 100mb chunks for uploads
chunkMb=32
byteSize=$( bc <<<"1024*1024*$chunkMb" )

srcFileSize=$( stat -c%s $srcFile )

if [[ $srcFileSize < $byteSize ]]; then
echo; echo
echo "file [ $srcFile ] size [ $srcFileSize ] < [ $byteSize ] "
echo; echo

aws glacier upload-archive --profile $profileName --account-id - --vault-name $vaultName --body $srcFile
else


# make tmp directory to split file
ramDisk="/dev/shm"
tmpPrefix="glacierUpload_"

previousTmpDirs=$(ls $ramDisk/$tmpPrefix* 2> /dev/null )
if [ ! -z "$previousTmpDirs" ]; then
echo ; echo

echo Previous temp directories exist
echo $previousTmpDirs
echo remove contents before continuing. This Only happens when the previous execution fails!

echo ; echo

exit
fi

tmpDir="$ramDisk/$tmpPrefix$( date +"%s" )"
echo "Creating [ $tmpDir ]"
mkdir -p $tmpDir

# split the file
split --bytes=$byteSize --verbose $srcFile $tmpDir/$partPrefix

# count the number of files that begin with "$partPrefix"
fileCount=$(ls -1 $tmpDir | grep "^$partPrefix" | wc -l)
echo "Total parts to upload: " $fileCount

cd $tmpDir

# get the list of part files to upload. Edit this if you chose a different prefix in the split command
files=$(ls | grep "^$partPrefix")
echo "Files: ${#files[@]}"

# initiate multipart upload connection to glacier
init=$(aws glacier initiate-multipart-upload --profile $profileName --account-id - --part-size $byteSize --vault-name $vaultName --archive-description "$archiveDesc")

echo "---------------------------------------"
# xargs trims off the quotes
# jq pulls out the json element titled uploadId
uploadId=$(echo $init | jq '.uploadId' | xargs)

echo uploadID [ $uploadId ]

# create temp file to store commands
touch commands.txt

# create upload commands to be run in parallel and store in commands.txt
i=0
for f in $files
do
byteStart=$((i*byteSize))
byteEnd=$((i*byteSize+byteSize-1))
echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/*'"'" --profile $profileName --account-id - --vault-name $vaultName --upload-id $uploadId >> commands.txt
i=$(($i+1))

done

# run upload commands in parallel
# --load 100% option only gives new jobs out if the core is than 100% active
# -a commands.txt runs every line of that file in parallel, in potentially random order
# --notice supresses citation output to the console
# --bar provides a command line progress bar
parallel --load 100% -a commands.txt --eta --progress

echo "List Active Multipart Uploads:"
echo "Verify that a connection is open:"
aws glacier list-multipart-uploads --profile $profileName --account-id - --vault-name $vaultName

# end the multipart upload
aws glacier abort-multipart-upload --profile $profileName --account-id - --vault-name $vaultName --upload-id $uploadId

# list open multipart connections
echo "------------------------------"
echo "List Active Multipart Uploads:"
echo "Verify that the connection is closed:"
aws glacier list-multipart-uploads --profile $profileName --account-id - --vault-name $vaultName

#echo "-------------"
#echo "Contents of commands.txt"
cat commands.txt
echo "--------------"
echo "Deleting temporary commands.txt file"
rm commands.txt

rm -rf $tmpDir

fi
13 changes: 13 additions & 0 deletions listGlacierJobs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

if [ ! "$1" ] || [ ! "$2" ]; then
echo; echo;
echo $0 [ Profile name ] [ Vault Name ]
echo; echo;
exit 1
fi

profile=$1
vaultName=$2

aws glacier list-jobs --profile $profile --account-id - --vault-name $vaultName