Skip to content

Avoid attaching IAM policy to AWSReservedSSO accounts #385

@rocoll

Description

@rocoll

Terraform Version Details

I run terraform from inside the viya4-iac-aws container.

Command definition:

$ alias terraform

alias terraform='docker container run --rm --group-add root --user 1000:977 -v /home/cloud-user/.aws:/.aws -v /home/cloud-user/.ssh:/.ssh -v /home/cloud-user/viya4-iac-aws:/workspace --entrypoint terraform viya4-iac-aws'

TF version:

$ terraform version

Terraform v1.10.5
on linux_amd64
+ provider registry.terraform.io/hashicorp/aws v5.100.0
+ provider registry.terraform.io/hashicorp/cloudinit v2.3.7
+ provider registry.terraform.io/hashicorp/external v2.3.5
+ provider registry.terraform.io/hashicorp/kubernetes v2.38.0
+ provider registry.terraform.io/hashicorp/local v2.5.3
+ provider registry.terraform.io/hashicorp/null v3.2.4
+ provider registry.terraform.io/hashicorp/random v3.7.2
+ provider registry.terraform.io/hashicorp/time v0.13.1
+ provider registry.terraform.io/hashicorp/tls v4.1.0

Terraform Variable File Details

Using the IAC parameters to provision Amazon FSx for NetApp ONTAP file system resources:

# REQUIRED VARIABLES
# Necessary for use by the IAC
# --------------------------------------
# - Prefix is used for naming resources for easy identification
# - Location is the geo region where resources will be placed
#
prefix                                  = "$MY_PREFIX"
location                                = "$MY_REGION" 

# ACCESS, IDENTITY, and AUTHENTICATION
# Who is doing what where
# --------------------------------------
#
aws_profile                             = "default"            # or whatever you named it
ssh_public_key                          = "~/.ssh/id_rsa.pub"
create_static_kubeconfig                = true

# CIDR
# Specify public access CIDR to allow ingress traffic to the EKS cluster
# --------------------------------------
# - Define access from RACE VMWARE and RACE Azure clients networks
#
default_public_access_cidrs         = ["149.173.0.0/16", "52.226.102.80/32", "52.226.102.81/32"]

# TAGS
# Optional metadata associated with AWS resources
# --------------------------------------
# - Resourceowner makes it easy to find associated resources
# - Project_Name and GEL_Project are for tracking
# - Chronos (old) and Smart Parking (new) are SAS IT programs to auto-shutdown resources
# - GEL Smart Parking Exemption: RITM0988495
#
tags = { "resourceowner"          = "$MY_PREFIX", 
        "project_name"           = "PSGEL297", 
        "gel_project"            = "PSGEL297", 
        "disable_chronos"        = "True", 
        "smart_parking_disabled" = "True" 
    }

# EXTERNAL POSTGRES SERVER
# --------------------------------------
# - if defined, creates an External Postgres Server in AWS, else use internal Crunchy
#
#postgres_servers = {
#  default = {},
#}

## Cluster config
kubernetes_version                      = "1.31"

default_nodepool_node_count             = 2
default_nodepool_vm_type                = "m7i-flex.2xlarge"
default_nodepool_ami_type               = "AL2023_x86_64_STANDARD"
default_nodepool_custom_data            = ""

## Storage
storage_type                                   = "ha"
storage_type_backend                           = "ontap"
aws_fsx_ontap_deployment_type                  = "SINGLE_AZ_1"     # or MULTI_AZ_1
aws_fsx_ontap_file_system_storage_capacity     = "1024"            # up to 196608
aws_fsx_ontap_file_system_throughput_capacity  = "512"             # up to 4096
aws_fsx_ontap_fsxadmin_password                = "ThePowerToKnow123!"

## Cluster Node Pools config
node_pools = {
cas = {
    "vm_type" = "r6idn.2xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes" = 4
    "max_nodes" = 5
    "node_taints" = ["workload.sas.com/class=cas:NoSchedule"]
    "node_labels" = {
    "workload.sas.com/class" = "cas"
    }
    "custom_data" = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
},
compute = {
    "vm_type" = "r6idn.4xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes" = 1
    "max_nodes" = 5
    "node_taints" = ["workload.sas.com/class=compute:NoSchedule"]
    "node_labels" = {
    "workload.sas.com/class"        = "compute"
    "launcher.sas.com/prepullImage" = "sas-programming-environment"
    }
    "custom_data" = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
},
stateless = {
    "vm_type" = "m7i-flex.4xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes" = 1
    "max_nodes" = 5
    "node_taints" = ["workload.sas.com/class=stateless:NoSchedule"]
    "node_labels" = {
    "workload.sas.com/class" = "stateless"
    }
    "custom_data" = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
},
stateful = {
    "vm_type" = "m7i-flex.4xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes" = 1
    "max_nodes" = 3
    "node_taints" = ["workload.sas.com/class=stateful:NoSchedule"]
    "node_labels" = {
    "workload.sas.com/class" = "stateful"
    }
    "custom_data" = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
}
}

# Jump Server
create_jump_vm                        = true
jump_vm_admin                         = "jumpuser"
jump_vm_type                          = "t3.small"

# NFS Server
# required ONLY when storage_type is "standard" to create NFS Server VM
create_nfs_public_ip                  = false
nfs_vm_admin                          = "nfsuser"
nfs_vm_type                           = "m7i-flex.xlarge"

Steps to Reproduce

Configure the TF variables file with storage_ and aws_fsx_ontap_ parameters for FSx storage and perform the usual "terraform plan" and "apply" steps.

Expected Behavior

The "terraform apply" step completes successfully, including the provisioning of a new file system in AWS FSx.

Actual Behavior

The "terraform apply" step fails with error:

Error: attaching IAM Policy to IAM Role: operation error IAM: AttachRolePolicy, 
https response error StatusCode: 400, RequestID: xxx, UnmodifiableEntity: 
Cannot perform the operation on the protected role 'AWSReservedSSO_xxx' - 
this role is only modifiable by AWS

Additional Context

At SAS (and likely most other IT organizations), we avoid AWS Users and rely on protected, managed roles controlled by the AWS IAM Identity Center. These roles typically are named beginning with "AWSReservedSSO_" and they do not allow the attachment of new IAM policies.

The current IAC implementation assumes that the active user/role can be modified with new policies. That's likely to be true less and less over time. Instead, we should take a more flexible approach.

In viya4-iac-aws/modules/aws_fsx_ontap.tf:

  • Delete these lines near the end of the file:

    # IAM User Policy Attachment resource
    resource "aws_iam_user_policy_attachment" "attachment" {
      count      = var.is_user ? 1 : 0          # Attach policy only if var.is_user is true
      user       = var.iam_user_name            # IAM user name to attach the policy
      policy_arn = aws_iam_policy.fsx_ontap.arn # ARN of the FSx ONTAP policy
    }
    
    # IAM Role Policy Attachment resource
    resource "aws_iam_role_policy_attachment" "attachment" {
      count      = var.is_user ? 0 : 1          # Attach policy only if var.is_user is false
      role       = var.iam_role_name            # IAM role name to attach the policy
      policy_arn = aws_iam_policy.fsx_ontap.arn # ARN of the FSx ONTAP policy
    }
  • Replacing them with:

    # Local variables for logic evaluation
    locals {
      # Check if the role is protected (AWS SSO or service-linked role)
      is_protected_role = !var.is_user && (
        startswith(var.iam_role_name, "AWSReservedSSO_") ||
        startswith(var.iam_role_name, "aws-service-role/")
      )
      
      # Determine if we should attach policy to user
      attach_to_user = var.is_user ? 1 : 0
      
      # Determine if we should attach policy to role (only if not protected)
      attach_to_role = !var.is_user && !local.is_protected_role ? 1 : 0
    }
    
    resource "aws_iam_user_policy_attachment" "attachment" {
      count      = local.attach_to_user           # Attach policy only if caller is a user
      user       = var.iam_user_name              # IAM user name to attach the policy
      policy_arn = aws_iam_policy.fsx_ontap.arn   # ARN of the FSx ONTAP policy
    }
    
    # IAM Role Policy Attachment resource - Skip for AWS SSO and service-linked roles
    resource "aws_iam_role_policy_attachment" "role_attachment" {
      count      = local.attach_to_role           # Only attach if role is not protected
      role       = var.iam_role_name              # IAM role name to attach the policy
      policy_arn = aws_iam_policy.fsx_ontap.arn   # ARN of the FSx ONTAP policy
    }
    
    # Output information about policy attachment
    output "policy_attachment_info" {
      description = "Information about FSx ONTAP policy attachment"
      value = local.is_protected_role ? {
        status = "Policy attachment skipped for protected role: ${var.iam_role_name}"
        message = "AWS SSO and service-linked roles cannot be modified. The role already has necessary FSx permissions through existing policies."
        policy_arn = aws_iam_policy.fsx_ontap.arn
        protected_role = true
      } : {
        status = "Policy attached to ${var.is_user ? "user" : "role"}: ${var.is_user ? var.iam_user_name : var.iam_role_name}"
        policy_arn = aws_iam_policy.fsx_ontap.arn
        protected_role = false
      }
    }
    
    # Debug output to help troubleshoot
    output "debug_info" {
      description = "Debug information about the FSx ONTAP module logic"
      value = {
        is_user = var.is_user
        iam_role_name = var.iam_role_name
        iam_user_name = var.iam_user_name
        is_protected_role = local.is_protected_role
        attach_to_user = local.attach_to_user
        attach_to_role = local.attach_to_role
        startswith_sso = startswith(var.iam_role_name, "AWSReservedSSO_")
        startswith_service = startswith(var.iam_role_name, "aws-service-role/")
      }
    }

The fix identifies protected roles using these criteria:

Role Type Detection Logic Action
AWS SSO Roles startswith(role_name, "AWSReservedSSO_") Skip policy attachment
Service-Linked Roles startswith(role_name, "aws-service-role/") Skip policy attachment
Standard IAM Roles Does not match above patterns Attach policy normally
IAM Users var.is_user == true Attach to user (unchanged)

For the fix to work properly, AWSReservedSSO roles must have FSx permissions through their existing IAM policies. Ensure the following permissions are included in the role's SSO permission set or attached IAM policies:

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Sid": "fsxFileSystemOwn",
      "Effect": "Allow",
      "Action": [
        "fsx:CreateFileSystem",
        "fsx:UpdateFileSystem", 
        "fsx:UntagResource",
        "fsx:CreateBackup",
        "fsx:TagResource",
        "fsx:DeleteFileSystem"
      ],
      "Resource": "*"
    },
    {
      "Sid": "fsxFileSystemAll",
      "Effect": "Allow", 
      "Action": [
        "fsx:CreateVolume",
        "fsx:DeleteStorageVirtualMachine",
        "fsx:UpdateVolume",
        "fsx:CreateStorageVirtualMachine",
        "fsx:DeleteVolume"
      ],
      "Resource": "arn:aws:fsx:*:*:*/*"
    },
    {
      "Sid": "fsxVolumeOwn",
      "Effect": "Allow",
      "Action": [
        "fsx:CreateVolume",
        "fsx:UpdateVolume", 
        "fsx:DeleteVolume"
      ],
      "Resource": "arn:aws:fsx:*:*:volume/*"
    },
    {
      "Sid": "fsxDescribeAll",
      "Effect": "Allow",
      "Action": [
        "fsx:DescribeFileSystems",
        "fsx:DescribeVolumes",
        "fsx:DescribeStorageVirtualMachines",
        "fsx:UntagResource",
        "fsx:TagResource"
      ],
      "Resource": "*"
    },
    {
      "Sid": "fsxListTagsAll", 
      "Effect": "Allow",
      "Action": "fsx:ListTagsForResource",
      "Resource": "arn:aws:fsx:*:*:*/*"
    }
  ]
}

Consider adding these to the list of existing permissions in the IAM Policy for viya4-iac-aws.

The suggested change is designed to be:

  • no breaking change for existing deployments using IAM users/roles
  • enhance security by aligning with AWS protection mechanisms

References

No response

Code of Conduct

  • I agree to follow this project's Code of Conduct

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingnewAdded to an issue when it's new ;)staleOpen for 30 days with no activity

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions