CREATE EXTERNAL TABLE `<table name>`(
  `type` string COMMENT '',
  `time` string COMMENT '',
  `elb` string COMMENT '',
  `client_ip` string COMMENT '',
  `client_port` int COMMENT '',
  `target_ip` string COMMENT '',
  `target_port` int COMMENT '',
  `request_processing_time` double COMMENT '',
  `target_processing_time` double COMMENT '',
  `response_processing_time` double COMMENT '',
  `elb_status_code` string COMMENT '',
  `target_status_code` string COMMENT '',
  `received_bytes` bigint COMMENT '',
  `sent_bytes` bigint COMMENT '',
  `request_verb` string COMMENT '',
  `request_url` string COMMENT '',
  `request_proto` string COMMENT '',
  `user_agent` string COMMENT '',
  `ssl_cipher` string COMMENT '',
  `ssl_protocol` string COMMENT '',
  `target_group_arn` string COMMENT '',
  `trace_id` string COMMENT '',
  `domain_name` string COMMENT '',
  `chosen_cert_arn` string COMMENT '',
  `matched_rule_priority` string COMMENT '',
  `request_creation_time` string COMMENT '',
  `actions_executed` string COMMENT '',
  `redirect_url` string COMMENT '',
  `lambda_error_reason` string COMMENT '',
  `target_port_list` string COMMENT '',
  `target_status_code_list` string COMMENT '',
  `classification` string COMMENT '',
-  `classification_reason` string COMMENT ''
+  `classification_reason` string COMMENT '',
+  `traceability_id` string COMMENT '',
+  `unknown_fields` string COMMENT ''
)
PARTITIONED BY (
  `year` int COMMENT '',
  `month` int COMMENT '',
  `day` int COMMENT '')
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
-  'input.regex' =
-        '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\"')
+  'input.regex' =
+        '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?')
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://<s3 bucket>/<lb name>/AWSLogs/<aws account id>/elasticloadbalancing/ap-northeast-1'
TBLPROPERTIES (
  'classification'='csv',
  'compressionType'='gzip',
  'projection.day.digits'='2',
  'projection.day.range'='01,31',
  'projection.day.type'='integer',
  'projection.enabled'='true',
  'projection.month.digits'='2',
  'projection.month.range'='01,12',
  'projection.month.type'='integer',
  'projection.year.digits'='4',
  'projection.year.range'='2020,2100',
  'projection.year.type'='integer',
  'storage.location.template'='s3://<s3 bucket>/<lb name>/AWSLogs/<aws account id>/elasticloadbalancing/ap-northeast-1/${year}/${month}/${day}',
  'typeOfData'='file')

以上
参考になれば幸いです。

2023-12-07AWS

AWS リソースの年間予約購入

年間予約購入まとめ
RI 期限切れ通知設定
Reserved Instance 購入前後のコスト削減額の計算
Savings Plans コスト削減額計算
- AWS Savings Plans コミット値推奨値を利用する場合
- AWS Savings Plans 自前でコミット値を設定する場合
CloudFront Security Bundle コスト削減額

AWS リソースの年間予約購入について備忘録です。

年間予約購入まとめ

2023-12-01AWS

aws-cli で AWS 起動中のリソース一覧取得する

備忘録です。

複数 AWS アカウントで起動中のリソース一覧作りたい時によく利用しています。

#!/bin/bash

profiles=(
  <profile names in ~/.aws/credentials>
)

for profile in ${profiles[@]}; do
    awsume $profile --session-name "kenzo.tanaka" --output-profile tmp
    account_id=$(aws sts get-caller-identity --profile tmp --query 'Account' --output text)

    aws ec2 --profile tmp describe-instances --filters "Name=instance-state-name,Values=running" \
        | jq -r ".Reservations[].Instances[] | \"$profile,$account_id,ec2,\"+ .InstanceType +\",1,\"+ (.Tags[]|select(.Key == \"Name\").Value)"

    # aws rds describe-db-clusters だと cluster を利用していない場合に instance 情報が取得できない
    aws rds --profile tmp describe-db-instances \
        | jq -r ".DBInstances[] | select(.DBInstanceStatus==\"available\") | \"$profile,$account_id,\"+ .Engine +\",\"+ .DBInstanceClass +\",1,\"+ .DBInstanceIdentifier"

    aws elasticache --profile tmp describe-cache-clusters \
        | jq -r ".CacheClusters[] | \"$profile,$account_id,\"+ .Engine +\",\"+ .CacheNodeType +\",\"+ (.NumCacheNodes|tostring) +\",\"+ .CacheClusterId"

    aws redshift --profile tmp describe-clusters \
        | jq -r ".Clusters[] | select(.ClusterStatus==\"available\") | \"$profile,$account_id,redshift,\"+ .NodeType +\",\"+ (.NumberOfNodes|tostring) +\",\"+ .ClusterIdentifier"
done

Kafka への流入量の試算

awsume と peco を使ってスイッチロールを簡単に

Confluent Cloud の料金比較：SaaS版 vs AWS Marketplace版

データ分析観点から見た AWS ECS コンテナロギング

ECS コンテナロギングの構成

ECS → CloudWatch Logs

RDS のテーブルデータを分析用テーブルにレプリケートする方法一覧

ToC

概要

前提

Glue Job Bookmark 機能でなく sampleQuery を使って DB データをエクスポートしてみた

ToC

概要

Glue Job Bookmark 機能とは？

Error: Docker is unreachable. Docker needs to be running to build inside a container.

ToC

ALB アクセスログに新たな項目が追加された

AWS リソースの年間予約購入

ToC

年間予約購入まとめ

aws-cli で AWS 起動中のリソース一覧取得する