In this example we are going to setup an asynchronous event driven architecture with Terraform. It will be a scheduled operation and use a few AWS services such as EventBridge, SQS, Lambda, SNS, CloudWatch. Finally if something goes wrong with the operation, it will send an alarm message to an email address. If you want, you can send the alarm to more productive tools such as Slack but let's stick to email solution to keep the post shorter. Our lambda function is a Golang application that fakes successful and failed message consumption for the purpose of demonstration.


How it works



Notes




Structure


├── cmd
│   └── account_balance_report
│   └── main.go
├── .gitignore
├── go.mod
├── go.sum
└── terraform
└── development
├── alarm.tf
├── lambda.tf
├── main.tf
├── scheduler.tf
└── sqs.tf

.gitignore


.terraform/
terraform.tfstate*

tmp/
bin/

main.go


package main

import (
"context"
"fmt"
"time"

"github.com/aws/aws-lambda-go/events"
"github.com/aws/aws-lambda-go/lambda"
)

func main() {
lambda.Start(handler)
}

func handler(ctx context.Context, event events.SQSEvent) error {
if len(event.Records) == 0 {
return nil
}

body := event.Records[0].Body

// Debugger: Purposely fail message consumtion.
if sec := time.Now().UTC().Nanosecond(); sec%2 == 0 {
return fmt.Errorf("error @ %d: %+v", sec, body)
}

// Otherwise, message is consumed just fine.
fmt.Printf("success: %+v", body)

return nil
}

main.tf


terraform {
required_version = "~> 1.4.4"

required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.41.0"
}

archive = {
source = "hashicorp/archive"
version = "~> 2.3.0"
}

null = {
source = "hashicorp/null"
version = "~> 3.2.1"
}
}
}

provider "aws" {
profile = "development"
region = "eu-west-1"
}

scheduler.tf


resource "aws_scheduler_schedule" "periodic_account_balance_report_scheduler" {
name = "periodic-account-balance-report"

flexible_time_window {
mode = "OFF"
}

schedule_expression = "rate(2 minutes)"

target {
arn = aws_sqs_queue.periodic_account_balance_report_source.arn
role_arn = aws_iam_role.periodic_account_balance_report_scheduler_role.arn

input = jsonencode({
Body = "{\"message\":\"Time to generate a new report!\"}"
})
}
}

resource "aws_iam_role" "periodic_account_balance_report_scheduler_role" {
name = "periodic-account-balance-report-scheduler"

managed_policy_arns = [aws_iam_policy.periodic_account_balance_report_scheduler_policy.arn]

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "scheduler.amazonaws.com"
}
},
]
})
}

resource "aws_iam_policy" "periodic_account_balance_report_scheduler_policy" {
name = "periodic-account-balance-report-scheduler"

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sqs:SendMessage"
Effect = "Allow"
Resource = aws_sqs_queue.periodic_account_balance_report_source.arn
},
]
})
}

sqs.tf


resource "aws_sqs_queue" "periodic_account_balance_report_source" {
name = "periodic-account-balance-report-source"

max_message_size = 2048
message_retention_seconds = 600
delay_seconds = 0
visibility_timeout_seconds = 30
receive_wait_time_seconds = 10

redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.periodic_account_balance_report_deadletter.arn
maxReceiveCount = 2
})
}

resource "aws_sqs_queue" "periodic_account_balance_report_deadletter" {
name = "periodic-account-balance-report-deadletter"
}

resource "aws_sqs_queue_redrive_allow_policy" "periodic_account_balance_report_deadletter_policy" {
queue_url = aws_sqs_queue.periodic_account_balance_report_deadletter.id

redrive_allow_policy = jsonencode({
redrivePermission = "byQueue",
sourceQueueArns = [aws_sqs_queue.periodic_account_balance_report_source.arn]
})
}

lambda.tf


locals {
lambda_function_name = "account_balance_report"

go_source_path = "${path.module}/../../cmd/account_balance_report/..."
go_binary_path = "${path.module}/../../bin/account_balance_report"
go_zip_path = "${path.module}/../../tmp/account_balance_report.zip"
}

resource "null_resource" "account_balance_report_go_binary_file" {
provisioner "local-exec" {
command = "GOOS=linux GOARCH=amd64 CGO_ENABLED=0 GOFLAGS=-trimpath go build -mod=readonly -ldflags='-s -w' -o ${local.go_binary_path} ${local.go_source_path}"
}
}

data "archive_file" "account_balance_report_go_zip_file" {
type = "zip"
source_file = local.go_binary_path
output_path = local.go_zip_path

depends_on = [
null_resource.account_balance_report_go_binary_file,
]
}

resource "aws_lambda_function" "account_balance_report_lambda" {
function_name = local.lambda_function_name
handler = local.lambda_function_name
filename = local.go_zip_path
package_type = "Zip"
runtime = "go1.x"
timeout = 30
memory_size = 128

role = aws_iam_role.account_balance_report_lambda_executor_role.arn
source_code_hash = data.archive_file.account_balance_report_go_zip_file.output_base64sha256

depends_on = [
aws_cloudwatch_log_group.account_balance_report_lambda_log_group,
]
}

resource "aws_iam_role" "account_balance_report_lambda_executor_role" {
name = "account-balance-report-lambda-executor"

managed_policy_arns = [aws_iam_policy.account_balance_report_lambda_policy.arn]

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "lambda.amazonaws.com"
}
},
]
})
}

resource "aws_iam_policy" "account_balance_report_lambda_policy" {
name = "account-balance-report-lambda-policy"

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = [
"sqs:ReceiveMessage",
"sqs:DeleteMessage",
"sqs:GetQueueAttributes",
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents",
]
Effect = "Allow"
Resource = [
"arn:aws:logs:*:*:*",
aws_sqs_queue.periodic_account_balance_report_source.arn,
]
},
]
})
}

resource "aws_cloudwatch_log_group" "account_balance_report_lambda_log_group" {
name = "/aws/lambda/${local.lambda_function_name}"
retention_in_days = 30
}

resource "aws_lambda_event_source_mapping" "account_balance_report_lambda_source_mapping" {
function_name = aws_lambda_function.account_balance_report_lambda.arn
event_source_arn = aws_sqs_queue.periodic_account_balance_report_source.arn
batch_size = 1
}

alarm.tf


resource "aws_sns_topic" "periodic_account_balance_report_alarm" {
name = "periodic-account-balance-report-alarm"
}

resource "aws_sns_topic_subscription" "periodic_account_balance_report_alarm_email" {
topic_arn = aws_sns_topic.periodic_account_balance_report_alarm.arn
protocol = "email"
endpoint = "me@example.com"
}

resource "aws_cloudwatch_metric_alarm" "periodic_account_balance_report_alarm_trigger" {
alarm_name = "periodic-account-balance-report-alarm-trigger"
alarm_description = "Periodic account balance report generation has failed"

comparison_operator = "GreaterThanThreshold"
threshold = 0
evaluation_periods = 1
datapoints_to_alarm = 1
treat_missing_data = "notBreaching"
alarm_actions = [aws_sns_topic.periodic_account_balance_report_alarm.arn]

metric_query {
id = "query"
label = "new-message"
expression = "RATE(metric)"
return_data = "true"
}

metric_query {
id = "metric"
label = "sqs"

metric {
metric_name = "ApproximateNumberOfMessagesVisible"
namespace = "AWS/SQS"
period = "60"
stat = "Minimum"
unit = "Count"

dimensions = {
QueueName = aws_sqs_queue.periodic_account_balance_report_deadletter.name
}
}
}
}

AWS config


I am provisioning AWS in my development account.


[default]
region = eu-west-1
output = json

[profile development]
region = eu-west-1
source_profile = default
role_session_name = github-actions
role_arn = arn:aws:iam::1234567890:role/ci-provisioner

... other profiles

[default]
aws_access_key_id = github-actions-id
aws_secret_access_key = github-actions-secret

Provision


me:~/aws/terraform/development$ terraform apply \
-replace="null_resource.account_balance_report_go_binary_file" \
-replace="archive_file.account_balance_report_go_zip_file" \
-replace="aws_lambda_function.account_balance_report_lambda"

Example alert email


Subject


ALARM: "periodic-account-balance-report-alarm-trigger" in EU (Ireland)

From


AWS Notifications no-reply@sns.amazonaws.com

Body


You are receiving this email because your Amazon CloudWatch Alarm "periodic-account-balance-report-alarm-trigger" in the EU (Ireland) region has entered the ALARM state, because "Threshold Crossed: 1 out of the last 1 datapoints [0.016666666666666666 (28/06/23 20:49:00)] was greater than the threshold (0.0) (minimum 1 datapoint for OK -> ALARM transition)." at "Wednesday 28 June, 2023 20:50:29 UTC".

View this alarm in the AWS Management Console:
https://eu-west-1.console.aws.amazon.com/cloudwatch/deeplink.js?region=eu-west-1#alarmsV2:alarm/periodic-account-balance-report-alarm-trigger

Alarm Details:
- Name: periodic-account-balance-report-alarm-trigger
- Description: Periodic account balance report generation has failed
- State Change: OK -> ALARM
- Reason for State Change: Threshold Crossed: 1 out of the last 1 datapoints [0.016666666666666666 (28/06/23 20:49:00)] was greater than the threshold (0.0) (minimum 1 datapoint for OK -> ALARM transition).
- Timestamp: Wednesday 28 June, 2023 20:50:29 UTC
- AWS Account: 1234567890
- Alarm Arn: arn:aws:cloudwatch:eu-west-1:1234567890:alarm:periodic-account-balance-report-alarm-trigger

Threshold:
- The alarm is in the ALARM state when the metric is GreaterThanThreshold 0.0 for at least 1 of the last 1 period(s) of 60 seconds.

Monitored Metrics:
- MetricExpression: RATE(metric)
- MetricLabel: new-message

State Change Actions:
- OK:
- ALARM: [arn:aws:sns:eu-west-1:1234567890:periodic-account-balance-report-alarm]
- INSUFFICIENT_DATA:

Screenshots


EventBridge





SQS



Lambda







CloudWatch






SNS