Setting aws bucket permissions with R

aws
s3
r
paws
Author

al

Published

May 24, 2024

Modified

May 27, 2024

Here we will set up an s3 bucket with a policy that allows the public to read from the bucket, but not from a specific directory, and allows a particular aws_account_id root user as well as a defined user to write to the bucket.

Although we are stoked on the s3fs package for working with s3 buckets, we will use the paws package more than perhaps necessary here - only to learn about how it all works. Seems like s3fs is the way to go for common moves but paws is the “mom” providing the structure and guidance to that package.

Code
library(paws)
library(here)
here() starts at /Users/airvine/Projects/repo/new_graphiti
Code
library(jsonlite)
library(stringr)
library(s3fs)

List our current buckets

Code
s3 <- paws::s3()
s3$list_buckets()
$Buckets
$Buckets[[1]]
$Buckets[[1]]$Name
[1] "23cog"

$Buckets[[1]]$CreationDate
[1] "2023-03-17 00:07:12 GMT"



$Owner
$Owner$DisplayName
[1] "al"

$Owner$ID
[1] "f5267b02e31758d1efea79b4eaef5d0423efb3e6a54ab869dc860bcc68ebae2d"

Create Bucket

Let’s create a bucket called the same name as this repository.

Code
my_bucket_name <- basename(here::here()) |> 
  stringr::str_replace_all("_", "-") 

bucket_path <- s3fs::s3_path(my_bucket_name)
Code
s3$create_bucket(Bucket = my_bucket_name,
  CreateBucketConfiguration = list(
    LocationConstraint = Sys.getenv("AWS_DEFAULT_REGION")
  ))
$Location
[1] "http://new-graphiti.s3.amazonaws.com/"

Add the policy to the bucket.

  1. Important - First we need to allow “new public policies” to be added to the bucket. We do this by deleteing the public access block. This is a security feature that prevents public access to the bucket. We will remove it so we can add our own policy. Took a while to catch this.
Code
s3$delete_public_access_block(
  Bucket = my_bucket_name
)
list()
  1. Write the policy for the bucket Here is a function to make a generic policy for an s3 bucket that allows public to read from the bucket, but not from a specific directory, and allows a particular aws_account_id to write to the bucket. Plus + it allows you to provide Presigned URLs so we can provide temporary access to private objects without having to change the overall bucket or object permissions.

Key thing here is that if we want a user to override the policy placed on a directory or file after we Deny access we need to add a condition to the policy that exempts the user (paste0("arn:aws:iam::", aws_account_id, ":user/", user_access_permission)) from the Deny.

Code
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-walkthroughs-managing-access-example1.html
#https://chatgpt.com/share/16106509-a34d-4f69-bf95-cd5eb2649707
aws_policy_write <- function(bucket_name, 
                             bucket_dir_private, 
                             aws_account_id, 
                             user_access_permission, 
                             write_json = FALSE, 
                             dir_output = "policy", 
                             file_name = "policy.json") {
  policy <- list(
    Version = "2012-10-17",
    Statement = list(
      list(
        Effect = "Allow",
        Principal = "*",
        Action = "s3:GetObject",
        Resource = paste0("arn:aws:s3:::", bucket_name, "/*")
      ),
      list(
        Effect = "Deny",
        Principal = "*",
        Action = "s3:GetObject",
        Resource = paste0("arn:aws:s3:::", bucket_name, "/", bucket_dir_private, "/*"),
        # IMPORTANT - Denies everyone from getting objects from the private directory except for user_access_permission
        Condition = list(
          StringNotEquals = list(
            "aws:PrincipalArn" = paste0("arn:aws:iam::", aws_account_id, ":user/", user_access_permission)
          )
        )
      ),
      list(
        Effect = "Allow",
        Principal = list(AWS = paste0("arn:aws:iam::", aws_account_id, ":root")),
        Action = c("s3:DeleteObject", "s3:PutObject"),
        Resource = paste0("arn:aws:s3:::", bucket_name, "/*")
      )
      #going to leave this here for now
      # list(
      #   Effect = "Allow",
      #   Principal = list(AWS = paste0("arn:aws:iam::", aws_account_id, ":user/", user_access_permission)),
      #   Action = c("s3:GetBucketLocation", "s3:ListBucket"),
      #   Resource = paste0("arn:aws:s3:::", bucket_name)
      # ),
      # list(
      #   Effect = "Allow",
      #   Principal = list(AWS = paste0("arn:aws:iam::", aws_account_id, ":user/", user_access_permission)),
      #   Action = "s3:GetObject",
      #   Resource = paste0("arn:aws:s3:::", bucket_name, "/*")
      # )
    )
  )
  
  json_policy <- jsonlite::toJSON(policy, pretty = TRUE, auto_unbox = TRUE)
  
  if (write_json) {
    dir.create(dir_output, showWarnings = FALSE)
    output_path <- file.path(dir_output, file_name)
    write(json_policy, file = output_path)
    message("Policy written to ", output_path)
  } else {
    return(json_policy)
  }
}

Now we can write the policy to the bucket.

Code
# run the function to build the json policy statement
my_policy <- aws_policy_write(bucket_name = my_bucket_name, 
                         bucket_dir_private = "private",
                         aws_account_id = Sys.getenv("AWS_ACCOUNT_ID"),
                         user_access_permission = "airvine",
                         write_json = FALSE
                         )

# push the policy to the bucket
s3$put_bucket_policy(
  Bucket = my_bucket_name,
  Policy = my_policy,
  ExpectedBucketOwner = Sys.getenv("AWS_ACCOUNT_ID")
)
list()
Code
# this is cool - Check the policy was added correctly.
s3$get_bucket_policy(my_bucket_name)

Add some files to the bucket

First we add a photo to the main bucket. Going to use s3fs for this since I haven’t actually done just one file yet… We are using the here package to get the path to the image due to rendering complexities.

Code
s3fs::s3_file_copy(
  path = paste0(here::here(), "/posts/", params$post_dir_name, "/image.jpg"),
  bucket_path
)
[1] "s3://new-graphiti/image.jpg"

Then we add one to the private directory.

Code
s3fs::s3_dir_create(
  path = paste0(bucket_path, "/private")
)
[1] "s3://new-graphiti/private"
Code
s3fs::s3_file_copy(
  path = paste0(here::here(), "/posts/", params$post_dir_name, "/image.jpg"),
  paste0(bucket_path, "/private")
)
[1] "s3://new-graphiti/private/image.jpg"

Access the bucket

Let’s see if we can add the images to this post.

Create the paths to the images.

Code
# s3fs::s3_dir_info(bucket_path, recurse = TRUE)
image_path <- paste0("https://", my_bucket_name, ".s3.amazonaws.com/image.jpg")
image_path_private <- paste0("https://", my_bucket_name, ".s3.amazonaws.com/private/image.jpg")

Access the public image.

Code
knitr::include_graphics(image_path)

Good to go.

And now access the private image.

Code
knitr::include_graphics(image_path_private)

💣 Jackpot! We have the image in the “private” bucket so can’t access them from the post without permission.

Provide temporary access to an object

Because we granted ourselves access to the private directory and our IAM roles have the correct privileges, we can create a Presigned URL to provide temporary access to the private image. We will set the maximum of 7 days for the URL to be valid. That means that at 2024-06-03 14:48 the URL will no longer work and the image below will no longer render in this post!

Code
knitr::include_graphics(
  s3fs::s3_file_url(
    s3_dir_ls(paste0(bucket_path, "/private")),
    604800,
    "get_object"
  )
)

Code
# this is the cmd line way
url_file_share <- s3_dir_ls(paste0(bucket_path, "/private"))
command <- "aws"
args <- c('s3', 'presign', url_file_share, '--expires-in', '604800')

working_directory = here::here() #we could just remove from funciton to get the current wd but its nice to have so we leave

# loaded this function from the other file. should put in functions file or package
sys_call()

In order to rerun our post we need to delete the bucket. When we do rerun - we use the s3fs package to do it

Code
# Dont delete the bucket or the post wont render! ha
# Burn down the bucket 🔥.  If we try to use `s3$delete_bucket(Bucket = my_bucket_name)` we will get an error because the 
# bucket is not empty. 

#`s3fs::s3_bucket_delete(bucket_path)` works fine though.
s3fs::s3_bucket_delete(bucket_path)