Resource: awsKinesisFirehoseDeliveryStream
Provides a Kinesis Firehose Delivery Stream resource. Amazon Kinesis Firehose is a fully managed, elastic service to easily deliver real-time data streams to destinations such as Amazon S3 and Amazon Redshift.
For more details, see the Amazon Kinesis Firehose Documentation.
Example Usage
Extended S3 Destination
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
const awsS3BucketBucket = new aws.s3Bucket.S3Bucket(this, "bucket", {
bucket: "tf-test-bucket",
});
new aws.s3BucketAcl.S3BucketAcl(this, "bucket_acl", {
acl: "private",
bucket: awsS3BucketBucket.id,
});
const dataAwsIamPolicyDocumentFirehoseAssumeRole =
new aws.dataAwsIamPolicyDocument.DataAwsIamPolicyDocument(
this,
"firehose_assume_role",
{
statement: [
{
actions: ["sts:AssumeRole"],
effect: "Allow",
principals: [
{
identifiers: ["firehose.amazonaws.com"],
type: "Service",
},
],
},
],
}
);
const dataAwsIamPolicyDocumentLambdaAssumeRole =
new aws.dataAwsIamPolicyDocument.DataAwsIamPolicyDocument(
this,
"lambda_assume_role",
{
statement: [
{
actions: ["sts:AssumeRole"],
effect: "Allow",
principals: [
{
identifiers: ["lambda.amazonaws.com"],
type: "Service",
},
],
},
],
}
);
const awsIamRoleFirehoseRole = new aws.iamRole.IamRole(this, "firehose_role", {
assumeRolePolicy: dataAwsIamPolicyDocumentFirehoseAssumeRole.json,
name: "firehose_test_role",
});
const awsIamRoleLambdaIam = new aws.iamRole.IamRole(this, "lambda_iam", {
assumeRolePolicy: dataAwsIamPolicyDocumentLambdaAssumeRole.json,
name: "lambda_iam",
});
const awsLambdaFunctionLambdaProcessor = new aws.lambdaFunction.LambdaFunction(
this,
"lambda_processor",
{
filename: "lambda.zip",
functionName: "firehose_lambda_processor",
handler: "exports.handler",
role: awsIamRoleLambdaIam.arn,
runtime: "nodejs16.x",
}
);
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"extended_s3_stream",
{
destination: "extended_s3",
extendedS3Configuration: {
bucketArn: awsS3BucketBucket.arn,
processingConfiguration: {
enabled: "true",
processors: [
{
parameters: [
{
parameterName: "LambdaArn",
parameterValue: `\${${awsLambdaFunctionLambdaProcessor.arn}}:\$LATEST`,
},
],
type: "Lambda",
},
],
},
roleArn: awsIamRoleFirehoseRole.arn,
},
name: "terraform-kinesis-firehose-extended-s3-test-stream",
}
);
Extended S3 Destination with dynamic partitioning
These examples use built-in Firehose functionality, rather than requiring a lambda.
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"extended_s3_stream",
{
destination: "extended_s3",
extendedS3Configuration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
bufferSize: 64,
dynamicPartitioningConfiguration: {
enabled: "true",
},
errorOutputPrefix:
"errors/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}/",
prefix:
"data/customer_id=!{partitionKeyFromQuery:customer_id}/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/",
processingConfiguration: {
enabled: "true",
processors: [
{
parameters: [
{
parameterName: "SubRecordType",
parameterValue: "JSON",
},
],
type: "RecordDeAggregation",
},
{
type: "AppendDelimiterToRecord",
},
{
parameters: [
{
parameterName: "JsonParsingEngine",
parameterValue: "JQ-1.6",
},
{
parameterName: "MetadataExtractionQuery",
parameterValue: "{customer_id:.customer_id}",
},
],
type: "MetadataExtraction",
},
],
},
roleArn: "${aws_iam_role.firehose_role.arn}",
},
name: "terraform-kinesis-firehose-extended-s3-test-stream",
}
);
S3 Destination (deprecated)
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
const awsS3BucketBucket = new aws.s3Bucket.S3Bucket(this, "bucket", {
bucket: "tf-test-bucket",
});
new aws.s3BucketAcl.S3BucketAcl(this, "bucket_acl", {
acl: "private",
bucket: awsS3BucketBucket.id,
});
const dataAwsIamPolicyDocumentAssumeRole =
new aws.dataAwsIamPolicyDocument.DataAwsIamPolicyDocument(
this,
"assume_role",
{
statement: [
{
actions: ["sts:AssumeRole"],
effect: "Allow",
principals: [
{
identifiers: ["firehose.amazonaws.com"],
type: "Service",
},
],
},
],
}
);
const awsIamRoleFirehoseRole = new aws.iamRole.IamRole(this, "firehose_role", {
assumeRolePolicy: dataAwsIamPolicyDocumentAssumeRole.json,
name: "firehose_test_role",
});
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"test_stream",
{
destination: "s3",
name: "terraform-kinesis-firehose-test-stream",
s3Configuration: {
bucketArn: awsS3BucketBucket.arn,
roleArn: awsIamRoleFirehoseRole.arn,
},
}
);
Redshift Destination
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
const awsRedshiftClusterTestCluster = new aws.redshiftCluster.RedshiftCluster(
this,
"test_cluster",
{
clusterIdentifier: "tf-redshift-cluster",
clusterType: "single-node",
databaseName: "test",
masterPassword: "T3stPass",
masterUsername: "testuser",
nodeType: "dc1.large",
}
);
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"test_stream",
{
destination: "redshift",
name: "terraform-kinesis-firehose-test-stream",
redshiftConfiguration: {
clusterJdbcurl: `jdbc:redshift://\${${awsRedshiftClusterTestCluster.endpoint}}/\${${awsRedshiftClusterTestCluster.databaseName}}`,
copyOptions: "delimiter '|'",
dataTableColumns: "test-col",
dataTableName: "test-table",
password: "T3stPass",
roleArn: "${aws_iam_role.firehose_role.arn}",
s3BackupConfiguration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
bufferInterval: 300,
bufferSize: 15,
compressionFormat: "GZIP",
roleArn: "${aws_iam_role.firehose_role.arn}",
},
s3BackupMode: "Enabled",
username: "testuser",
},
s3Configuration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
bufferInterval: 400,
bufferSize: 10,
compressionFormat: "GZIP",
roleArn: "${aws_iam_role.firehose_role.arn}",
},
}
);
Elasticsearch Destination
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
const awsElasticsearchDomainTestCluster =
new aws.elasticsearchDomain.ElasticsearchDomain(this, "test_cluster", {
domainName: "firehose-es-test",
});
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"test_stream",
{
destination: "elasticsearch",
elasticsearchConfiguration: {
domainArn: awsElasticsearchDomainTestCluster.arn,
indexName: "test",
processingConfiguration: {
enabled: "true",
processors: [
{
parameters: [
{
parameterName: "LambdaArn",
parameterValue:
"${aws_lambda_function.lambda_processor.arn}:$LATEST",
},
],
type: "Lambda",
},
],
},
roleArn: "${aws_iam_role.firehose_role.arn}",
typeName: "test",
},
name: "terraform-kinesis-firehose-test-stream",
s3Configuration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
bufferInterval: 400,
bufferSize: 10,
compressionFormat: "GZIP",
roleArn: "${aws_iam_role.firehose_role.arn}",
},
}
);
Elasticsearch Destination With VPC
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
const awsElasticsearchDomainTestCluster =
new aws.elasticsearchDomain.ElasticsearchDomain(this, "test_cluster", {
clusterConfig: {
instanceCount: 2,
instanceType: "t2.small.elasticsearch",
zoneAwarenessEnabled: true,
},
domainName: "es-test",
ebsOptions: {
ebsEnabled: true,
volumeSize: 10,
},
vpcOptions: {
securityGroupIds: ["${aws_security_group.first.id}"],
subnetIds: ["${aws_subnet.first.id}", "${aws_subnet.second.id}"],
},
});
const dataAwsIamPolicyDocumentFirehoseElasticsearch =
new aws.dataAwsIamPolicyDocument.DataAwsIamPolicyDocument(
this,
"firehose-elasticsearch",
{
statement: [
{
actions: ["es:*"],
effect: "Allow",
resources: [
awsElasticsearchDomainTestCluster.arn,
`\${${awsElasticsearchDomainTestCluster.arn}}/*`,
],
},
{
actions: [
"ec2:DescribeVpcs",
"ec2:DescribeVpcAttribute",
"ec2:DescribeSubnets",
"ec2:DescribeSecurityGroups",
"ec2:DescribeNetworkInterfaces",
"ec2:CreateNetworkInterface",
"ec2:CreateNetworkInterfacePermission",
"ec2:DeleteNetworkInterface",
],
effect: "Allow",
resources: ["*"],
},
],
}
);
const awsIamRolePolicyFirehoseElasticsearch =
new aws.iamRolePolicy.IamRolePolicy(this, "firehose-elasticsearch_2", {
name: "elasticsearch",
policy: dataAwsIamPolicyDocumentFirehoseElasticsearch.json,
role: "${aws_iam_role.firehose.id}",
});
/*This allows the Terraform resource name to match the original name. You can remove the call if you don't need them to match.*/
awsIamRolePolicyFirehoseElasticsearch.overrideLogicalId(
"firehose-elasticsearch"
);
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"test",
{
depends_on: [`\${${awsIamRolePolicyFirehoseElasticsearch.fqn}}`],
destination: "elasticsearch",
elasticsearchConfiguration: {
domainArn: awsElasticsearchDomainTestCluster.arn,
indexName: "test",
roleArn: "${aws_iam_role.firehose.arn}",
typeName: "test",
vpcConfig: {
roleArn: "${aws_iam_role.firehose.arn}",
securityGroupIds: ["${aws_security_group.first.id}"],
subnetIds: ["${aws_subnet.first.id}", "${aws_subnet.second.id}"],
},
},
name: "terraform-kinesis-firehose-es",
s3Configuration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
roleArn: "${aws_iam_role.firehose.arn}",
},
}
);
Splunk Destination
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"test_stream",
{
destination: "splunk",
name: "terraform-kinesis-firehose-test-stream",
s3Configuration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
bufferInterval: 400,
bufferSize: 10,
compressionFormat: "GZIP",
roleArn: "${aws_iam_role.firehose.arn}",
},
splunkConfiguration: {
hecAcknowledgmentTimeout: 600,
hecEndpoint: "https://http-inputs-mydomain.splunkcloud.com:443",
hecEndpointType: "Event",
hecToken: "51D4DA16-C61B-4F5F-8EC7-ED4301342A4A",
s3BackupMode: "FailedEventsOnly",
},
}
);
HTTP Endpoint (e.g., New Relic) Destination
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"test_stream",
{
destination: "http_endpoint",
httpEndpointConfiguration: {
accessKey: "my-key",
bufferingInterval: 600,
bufferingSize: 15,
name: "New Relic",
requestConfiguration: {
commonAttributes: [
{
name: "testname",
value: "testvalue",
},
{
name: "testname2",
value: "testvalue2",
},
],
contentEncoding: "GZIP",
},
roleArn: "${aws_iam_role.firehose.arn}",
s3BackupMode: "FailedDataOnly",
url: "https://aws-api.newrelic.com/firehose/v1",
},
name: "terraform-kinesis-firehose-test-stream",
s3Configuration: {
bucketArn: "${aws_s3_bucket.bucket.arn}",
bufferInterval: 400,
bufferSize: 10,
compressionFormat: "GZIP",
roleArn: "${aws_iam_role.firehose.arn}",
},
}
);
Argument Reference
The following arguments are supported:
name
- (Required) A name to identify the stream. This is unique to the AWS account and region the Stream is created in. When using for WAF logging, name must be prefixed withawsWafLogs
. See AWS Documentation for more details.tags
- (Optional) A map of tags to assign to the resource. If configured with a providerdefaultTags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.kinesisSourceConfiguration
- (Optional) Allows the ability to specify the kinesis stream that is used as the source of the firehose delivery stream.serverSideEncryption
- (Optional) Encrypt at rest options. Server-side encryption should not be enabled when a kinesis stream is configured as the source of the firehose delivery stream.destination
– (Required) This is the destination to where the data is delivered. The only options ares3
(Deprecated, useextendedS3
instead),extendedS3
,redshift
,elasticsearch
,splunk
, andhttpEndpoint
.s3Configuration
- (Optional) Required for non-S3 destinations. For S3 destination, useextendedS3Configuration
instead. Configuration options for the s3 destination (or the intermediate bucket if the destination is redshift). More details are given below.extendedS3Configuration
- (Optional, only Required whendestination
isextendedS3
) Enhanced configuration options for the s3 destination. More details are given below.redshiftConfiguration
- (Optional) Configuration options if redshift is the destination. UsingredshiftConfiguration
requires the user to also specify as3Configuration
block. More details are given below.elasticsearchConfiguration
- (Optional) Configuration options if elasticsearch is the destination. More details are given below.splunkConfiguration
- (Optional) Configuration options if splunk is the destination. More details are given below.httpEndpointConfiguration
- (Optional) Configuration options if http_endpoint is the destination. requires the user to also specify as3Configuration
block. More details are given below.
The kinesisSourceConfiguration
object supports the following:
kinesisStreamArn
(Required) The kinesis stream used as the source of the firehose delivery stream.roleArn
(Required) The ARN of the role that provides access to the source Kinesis stream.
The serverSideEncryption
object supports the following:
enabled
- (Optional) Whether to enable encryption at rest. Default isfalse
.keyType
- (Optional) Type of encryption key. Default isAWS_OWNED_CMK
. Valid values areAWS_OWNED_CMK
andCUSTOMER_MANAGED_CMK
keyArn
- (Optional) Amazon Resource Name (ARN) of the encryption key. Required whenkeyType
isCUSTOMER_MANAGED_CMK
.
The s3Configuration
object supports the following:
\~> NOTE: This configuration block is deprecated for the s3
destination.
roleArn
- (Required) The ARN of the AWS credentials.bucketArn
- (Required) The ARN of the S3 bucketprefix
- (Optional) The "YYYY/MM/DD/HH" time format prefix is automatically used for delivered S3 files. You can specify an extra prefix to be added in front of the time format prefix. Note that if the prefix ends with a slash, it appears as a folder in the S3 bucketbufferSize
- (Optional) Buffer incoming data to the specified size, in MBs, before delivering it to the destination. The default value is 5. We recommend setting SizeInMBs to a value greater than the amount of data you typically ingest into the delivery stream in 10 seconds. For example, if you typically ingest data at 1 MB/sec set SizeInMBs to be 10 MB or higher.bufferInterval
- (Optional) Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination. The default value is 300.compressionFormat
- (Optional) The compression format. If no value is specified, the default isuncompressed
. Other supported values aregzip
,zip
,snappy
, &HADOOP_SNAPPY
.errorOutputPrefix
- (Optional) Prefix added to failed records before writing them to S3. Not currently supported forredshift
destination. This prefix appears immediately following the bucket name. For information about how to specify this prefix, see Custom Prefixes for Amazon S3 Objects.kmsKeyArn
- (Optional) Specifies the KMS key ARN the stream will use to encrypt data. If not set, no encryption will be used.cloudwatchLoggingOptions
- (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below
The extendedS3Configuration
object supports the same fields from s3Configuration
as well as the following:
dataFormatConversionConfiguration
- (Optional) Nested argument for the serializer, deserializer, and schema for converting data from the JSON format to the Parquet or ORC format before writing it to Amazon S3. More details given below.processingConfiguration
- (Optional) The data processing configuration. More details are given below.s3BackupMode
- (Optional) The Amazon S3 backup mode. Valid values aredisabled
andenabled
. Default value isdisabled
.s3BackupConfiguration
- (Optional) The configuration for backup in Amazon S3. Required ifs3BackupMode
isenabled
. Supports the same fields ass3Configuration
object.dynamicPartitioningConfiguration
- (Optional) The configuration for dynamic partitioning. See Dynamic Partitioning Configuration below for more details. Required when using dynamic partitioning.
The redshiftConfiguration
object supports the following:
clusterJdbcurl
- (Required) The jdbcurl of the redshift cluster.username
- (Required) The username that the firehose delivery stream will assume. It is strongly recommended that the username and password provided is used exclusively for Amazon Kinesis Firehose purposes, and that the permissions for the account are restricted for Amazon Redshift INSERT permissions.password
- (Required) The password for the username above.retryDuration
- (Optional) The length of time during which Firehose retries delivery after a failure, starting from the initial request and including the first attempt. The default value is 3600 seconds (60 minutes). Firehose does not retry if the value of DurationInSeconds is 0 (zero) or if the first delivery attempt takes longer than the current value.roleArn
- (Required) The arn of the role the stream assumes.s3BackupMode
- (Optional) The Amazon S3 backup mode. Valid values aredisabled
andenabled
. Default value isdisabled
.s3BackupConfiguration
- (Optional) The configuration for backup in Amazon S3. Required ifs3BackupMode
isenabled
. Supports the same fields ass3Configuration
object.dataTableName
- (Required) The name of the table in the redshift cluster that the s3 bucket will copy to.copyOptions
- (Optional) Copy options for copying the data from the s3 intermediate bucket into redshift, for example to change the default delimiter. For valid values, see the AWS documentationdataTableColumns
- (Optional) The data table columns that will be targeted by the copy command.cloudwatchLoggingOptions
- (Optional) The CloudWatch Logging Options for the delivery stream. More details are given belowprocessingConfiguration
- (Optional) The data processing configuration. More details are given below.
The elasticsearchConfiguration
object supports the following:
bufferingInterval
- (Optional) Buffer incoming data for the specified period of time, in seconds between 60 to 900, before delivering it to the destination. The default value is 300s.bufferingSize
- (Optional) Buffer incoming data to the specified size, in MBs between 1 to 100, before delivering it to the destination. The default value is 5MB.domainArn
- (Optional) The ARN of the Amazon ES domain. The pattern needs to bearn:.*
. Conflicts withclusterEndpoint
.clusterEndpoint
- (Optional) The endpoint to use when communicating with the cluster. Conflicts withdomainArn
.indexName
- (Required) The Elasticsearch index name.indexRotationPeriod
- (Optional) The Elasticsearch index rotation period. Index rotation appends a timestamp to the IndexName to facilitate expiration of old data. Valid values arenoRotation
,oneHour
,oneDay
,oneWeek
, andoneMonth
. The default value isoneDay
.retryDuration
- (Optional) After an initial failure to deliver to Amazon Elasticsearch, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0.roleArn
- (Required) The ARN of the IAM role to be assumed by Firehose for calling the Amazon ES Configuration API and for indexing documents. The IAM role must have permission fordescribeElasticsearchDomain
,describeElasticsearchDomains
, anddescribeElasticsearchDomainConfig
. The pattern needs to bearn:.*
.s3BackupMode
- (Optional) Defines how documents should be delivered to Amazon S3. Valid values arefailedDocumentsOnly
andallDocuments
. Default value isfailedDocumentsOnly
.typeName
- (Optional) The Elasticsearch type name with maximum length of 100 characters.cloudwatchLoggingOptions
- (Optional) The CloudWatch Logging Options for the delivery stream. More details are given belowvpcConfig
- (Optional) The VPC configuration for the delivery stream to connect to Elastic Search associated with the VPC. More details are given belowprocessingConfiguration
- (Optional) The data processing configuration. More details are given below.
The splunkConfiguration
objects supports the following:
hecAcknowledgmentTimeout
- (Optional) The amount of time, in seconds between 180 and 600, that Kinesis Firehose waits to receive an acknowledgment from Splunk after it sends it data.hecEndpoint
- (Required) The HTTP Event Collector (HEC) endpoint to which Kinesis Firehose sends your data.hecEndpointType
- (Optional) The HEC endpoint type. Valid values areraw
orevent
. The default value israw
.hecToken
- (Required) The GUID that you obtain from your Splunk cluster when you create a new HEC endpoint.s3BackupMode
- (Optional) Defines how documents should be delivered to Amazon S3. Valid values arefailedEventsOnly
andallEvents
. Default value isfailedEventsOnly
.retryDuration
- (Optional) After an initial failure to deliver to Splunk, the total amount of time, in seconds between 0 to 7200, during which Firehose re-attempts delivery (including the first attempt). After this time has elapsed, the failed documents are written to Amazon S3. The default value is 300s. There will be no retry if the value is 0.cloudwatchLoggingOptions
- (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below.processingConfiguration
- (Optional) The data processing configuration. More details are given below.
The httpEndpointConfiguration
objects supports the following:
url
- (Required) The HTTP endpoint URL to which Kinesis Firehose sends your data.name
- (Optional) The HTTP endpoint name.accessKey
- (Optional) The access key required for Kinesis Firehose to authenticate with the HTTP endpoint selected as the destination.roleArn
- (Required) Kinesis Data Firehose uses this IAM role for all the permissions that the delivery stream needs. The pattern needs to bearn:.*
.s3BackupMode
- (Optional) Defines how documents should be delivered to Amazon S3. Valid values arefailedDataOnly
andallData
. Default value isfailedDataOnly
.bufferingSize
- (Optional) Buffer incoming data to the specified size, in MBs, before delivering it to the destination. The default value is 5.bufferingInterval
- (Optional) Buffer incoming data for the specified period of time, in seconds, before delivering it to the destination. The default value is 300 (5 minutes).cloudwatchLoggingOptions
- (Optional) The CloudWatch Logging Options for the delivery stream. More details are given below.processingConfiguration
- (Optional) The data processing configuration. More details are given below.requestConfiguration
- (Optional) The request configuration. More details are given below.retryDuration
- (Optional) Total amount of seconds Firehose spends on retries. This duration starts after the initial attempt fails, It does not include the time periods during which Firehose waits for acknowledgment from the specified destination after each attempt. Valid values between0
and7200
. Default is300
.
The cloudwatchLoggingOptions
object supports the following:
enabled
- (Optional) Enables or disables the logging. Defaults tofalse
.logGroupName
- (Optional) The CloudWatch group name for logging. This value is required ifenabled
is true.logStreamName
- (Optional) The CloudWatch log stream name for logging. This value is required ifenabled
is true.
The processingConfiguration
object supports the following:
enabled
- (Optional) Enables or disables data processing.processors
- (Optional) Array of data processors. More details are given below
The processors
array objects support the following:
type
- (Required) The type of processor. Valid Values:recordDeAggregation
,lambda
,metadataExtraction
,appendDelimiterToRecord
. Validation is done against AWS SDK constants; so that values not explicitly listed may also work.parameters
- (Optional) Array of processor parameters. More details are given below
The parameters
array objects support the following:
parameterName
- (Required) Parameter name. Valid Values:lambdaArn
,numberOfRetries
,metadataExtractionQuery
,jsonParsingEngine
,roleArn
,bufferSizeInMBs
,bufferIntervalInSeconds
,subRecordType
,delimiter
. Validation is done against AWS SDK constants; so that values not explicitly listed may also work.parameterValue
- (Required) Parameter value. Must be between 1 and 512 length (inclusive). When providing a Lambda ARN, you should specify the resource version as well.
\~> NOTE: Parameters with default values, including numberOfRetries
(default: 3), roleArn
(default: firehose role ARN), bufferSizeInMBs
(default: 3), and bufferIntervalInSeconds
(default: 60), are not stored in terraform state. To prevent perpetual differences, it is therefore recommended to only include parameters with non-default values.
The requestConfiguration
object supports the following:
contentEncoding
- (Optional) Kinesis Data Firehose uses the content encoding to compress the body of a request before sending the request to the destination. Valid values arenone
andgzip
. Default value isnone
.commonAttributes
- (Optional) Describes the metadata sent to the HTTP endpoint destination. More details are given below
The commonAttributes
array objects support the following:
name
- (Required) The name of the HTTP endpoint common attribute.value
- (Optional) The value of the HTTP endpoint common attribute.
The vpcConfig
object supports the following:
subnetIds
- (Required) A list of subnet IDs to associate with Kinesis Firehose.securityGroupIds
- (Required) A list of security group IDs to associate with Kinesis Firehose.roleArn
- (Required) The ARN of the IAM role to be assumed by Firehose for calling the Amazon EC2 configuration API and for creating network interfaces. Make sure role has necessary IAM permissions
dataFormatConversionConfiguration
\~> NOTE: Once configured, the data format conversion configuration can only be disabled, in which the configuration values will remain, but will not be active. It is not currently possible to completely remove the configuration without recreating the resource.
Example:
/*Provider bindings are generated by running cdktf get.
See https://cdk.tf/provider-generation for more details.*/
import * as aws from "./.gen/providers/aws";
new aws.kinesisFirehoseDeliveryStream.KinesisFirehoseDeliveryStream(
this,
"example",
{
extendedS3Configuration: {
bufferSize: 128,
dataFormatConversionConfiguration: {
inputFormatConfiguration: {
deserializer: {
hiveJsonSerDe: {},
},
},
outputFormatConfiguration: {
serializer: {
orcSerDe: {},
},
},
schemaConfiguration: {
databaseName: "${aws_glue_catalog_table.example.database_name}",
roleArn: "${aws_iam_role.example.arn}",
tableName: "${aws_glue_catalog_table.example.name}",
},
},
},
}
);
inputFormatConfiguration
- (Required) Nested argument that specifies the deserializer that you want Kinesis Data Firehose to use to convert the format of your data from JSON. More details below.outputFormatConfiguration
- (Required) Nested argument that specifies the serializer that you want Kinesis Data Firehose to use to convert the format of your data to the Parquet or ORC format. More details below.schemaConfiguration
- (Required) Nested argument that specifies the AWS Glue Data Catalog table that contains the column information. More details below.enabled
- (Optional) Defaults totrue
. Set it tofalse
if you want to disable format conversion while preserving the configuration details.
inputFormatConfiguration
deserializer
- (Required) Nested argument that specifies which deserializer to use. You can choose either the Apache Hive JSON SerDe or the OpenX JSON SerDe. More details below.
deserializer
\~> NOTE: One of the deserializers must be configured. If no nested configuration needs to occur simply declare as xxxJsonSerDe = []
or xxxJsonSerDe {}
.
hiveJsonSerDe
- (Optional) Nested argument that specifies the native Hive / HCatalog JsonSerDe. More details below.openXJsonSerDe
- (Optional) Nested argument that specifies the OpenX SerDe. More details below.
hiveJsonSerDe
timestampFormats
- (Optional) A list of how you want Kinesis Data Firehose to parse the date and time stamps that may be present in your input data JSON. To specify these format strings, follow the pattern syntax of JodaTime's DateTimeFormat format strings. For more information, see Class DateTimeFormat. You can also use the special value millis to parse time stamps in epoch milliseconds. If you don't specify a format, Kinesis Data Firehose uses java.sql.Timestamp::valueOf by default.
openXJsonSerDe
caseInsensitive
- (Optional) When set to true, which is the default, Kinesis Data Firehose converts JSON keys to lowercase before deserializing them.columnToJsonKeyMappings
- (Optional) A map of column names to JSON keys that aren't identical to the column names. This is useful when the JSON contains keys that are Hive keywords. For example, timestamp is a Hive keyword. If you have a JSON key named timestamp, set this parameter to{Ts = "timestamp" }
to map this key to a column named ts.convertDotsInJsonKeysToUnderscores
- (Optional) When set totrue
, specifies that the names of the keys include dots and that you want Kinesis Data Firehose to replace them with underscores. This is useful because Apache Hive does not allow dots in column names. For example, if the JSON contains a key whose name is "a.b", you can define the column name to be "a_b" when using this option. Defaults tofalse
.
outputFormatConfiguration
serializer
- (Required) Nested argument that specifies which serializer to use. You can choose either the ORC SerDe or the Parquet SerDe. More details below.
serializer
\~> NOTE: One of the serializers must be configured. If no nested configuration needs to occur simply declare as xxxSerDe = []
or xxxSerDe {}
.
orcSerDe
- (Optional) Nested argument that specifies converting data to the ORC format before storing it in Amazon S3. For more information, see Apache ORC. More details below.parquetSerDe
- (Optional) Nested argument that specifies converting data to the Parquet format before storing it in Amazon S3. For more information, see Apache Parquet. More details below.
orcSerDe
blockSizeBytes
- (Optional) The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for padding calculations.bloomFilterColumns
- (Optional) A list of column names for which you want Kinesis Data Firehose to create bloom filters.bloomFilterFalsePositiveProbability
- (Optional) The Bloom filter false positive probability (FPP). The lower the FPP, the bigger the Bloom filter. The default value is005
, the minimum is0
, and the maximum is1
.compression
- (Optional) The compression code to use over data blocks. The default issnappy
.dictionaryKeyThreshold
- (Optional) A float that represents the fraction of the total number of non-null rows. To turn off dictionary encoding, set this fraction to a number that is less than the number of distinct keys in a dictionary. To always use dictionary encoding, set this threshold to1
.enablePadding
- (Optional) Set this totrue
to indicate that you want stripes to be padded to the HDFS block boundaries. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default isfalse
.formatVersion
- (Optional) The version of the file to write. The possible values areV0_11
andV0_12
. The default isV0_12
.paddingTolerance
- (Optional) A float between 0 and 1 that defines the tolerance for block padding as a decimal fraction of stripe size. The default value is005
, which means 5 percent of stripe size. For the default values of 64 MiB ORC stripes and 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the available size within the block is more than 3.2 MiB, a new, smaller stripe is inserted to fit within that space. This ensures that no stripe crosses block boundaries and causes remote reads within a node-local task. Kinesis Data Firehose ignores this parameter whenenablePadding
isfalse
.rowIndexStride
- (Optional) The number of rows between index entries. The default is10000
and the minimum is1000
.stripeSizeBytes
- (Optional) The number of bytes in each stripe. The default is 64 MiB and the minimum is 8 MiB.
parquetSerDe
blockSizeBytes
- (Optional) The Hadoop Distributed File System (HDFS) block size. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for padding calculations.compression
- (Optional) The compression code to use over data blocks. The possible values areuncompressed
,snappy
, andgzip
, with the default beingsnappy
. Usesnappy
for higher decompression speed. Usegzip
if the compression ratio is more important than speed.enableDictionaryCompression
- (Optional) Indicates whether to enable dictionary compression.maxPaddingBytes
- (Optional) The maximum amount of padding to apply. This is useful if you intend to copy the data from Amazon S3 to HDFS before querying. The default is0
.pageSizeBytes
- (Optional) The Parquet page size. Column chunks are divided into pages. A page is conceptually an indivisible unit (in terms of compression and encoding). The minimum value is 64 KiB and the default is 1 MiB.writerVersion
- (Optional) Indicates the version of row format to output. The possible values arev1
andv2
. The default isv1
.
schemaConfiguration
databaseName
- (Required) Specifies the name of the AWS Glue database that contains the schema for the output data.roleArn
- (Required) The role that Kinesis Data Firehose can use to access AWS Glue. This role must be in the same account you use for Kinesis Data Firehose. Cross-account roles aren't allowed.tableName
- (Required) Specifies the AWS Glue table that contains the column information that constitutes your data schema.catalogId
- (Optional) The ID of the AWS Glue Data Catalog. If you don't supply this, the AWS account ID is used by default.region
- (Optional) If you don't specify an AWS Region, the default is the current region.versionId
- (Optional) Specifies the table version for the output data schema. Defaults tolatest
.
dynamicPartitioningConfiguration
Required when using dynamic partitioning.
enabled
- (Optional) Enables or disables dynamic partitioning. Defaults tofalse
.retryDuration
- (Optional) Total amount of seconds Firehose spends on retries. Valid values between 0 and 7200. Default is 300.
\~> NOTE: You can enable dynamic partitioning only when you create a new delivery stream. Once you enable dynamic partitioning on a delivery stream, it cannot be disabled on this delivery stream. Therefore, Terraform will recreate the resource whenever dynamic partitioning is enabled or disabled.
Attributes Reference
In addition to all arguments above, the following attributes are exported:
arn
- The Amazon Resource Name (ARN) specifying the StreamtagsAll
- A map of tags assigned to the resource, including those inherited from the providerdefaultTags
configuration block.
Timeouts
create
- (Default30M
)update
- (Default10M
)delete
- (Default30M
)
Import
Kinesis Firehose Delivery streams can be imported using the stream ARN, e.g.,
$ terraform import aws_kinesis_firehose_delivery_stream.foo arn:aws:firehose:us-east-1:XXX:deliverystream/example
Note: Import does not work for stream destination s3
. Consider using extendedS3
since s3
destination is deprecated.