Adventure starts here
- Overview
- Quickstart
- Design
- Abstractions
Concepts
- Pipelines
- Blocks
- Data integrations
- Triggers
- Pipeline runs
- Backfills
Project
- Setup
- Version control
- Docker
- Variables
- Custom libraries
- Database
- Credentials
- Code execution
- Timezones
- Settings
- Version upgrades
Contributing
- Overview
- Environment setup
- Backend
- Frontend
- Documentation
Project
IO Config
The io_config
YAML file contains the information and credentials required to access databases, data warehouses, and data lakes.
Here’s a sample io_config
file. You can learn more about variable interpolation and referencing secrets via our variables overview here.
version: 0.1.1
default:
# Default profile created for data IO access.
# Add your credentials for the source you use, and delete the rest.
# AWS
AWS_ACCESS_KEY_ID: "{{ env_var('AWS_ACCESS_KEY_ID') }}"
AWS_SECRET_ACCESS_KEY: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}"
AWS_SESSION_TOKEN: session_token (Used to generate Redshift credentials)
AWS_REGION: region
# Azure
AZURE_CLIENT_ID: "{{ env_var('AZURE_CLIENT_ID') }}"
AZURE_CLIENT_SECRET: "{{ env_var('AZURE_CLIENT_SECRET') }}"
AZURE_STORAGE_ACCOUNT_NAME: "{{ env_var('AZURE_STORAGE_ACCOUNT_NAME') }}"
AZURE_TENANT_ID: "{{ env_var('AZURE_TENANT_ID') }}"
# ClickHouse
CLICKHOUSE_DATABASE: default
CLICKHOUSE_HOST: host.docker.internal
CLICKHOUSE_INTERFACE: http
CLICKHOUSE_PASSWORD: null
CLICKHOUSE_PORT: 8123
CLICKHOUSE_USERNAME: null
# Druid
DRUID_HOST: hostname
DRUID_PASSWORD: password
DRUID_PATH: /druid/v2/sql/
DRUID_PORT: 8082
DRUID_SCHEME: http
DRUID_USER: user
# Google
GOOGLE_SERVICE_ACC_KEY:
type: service_account
project_id: project-id
private_key_id: key-id
private_key: "-----BEGIN PRIVATE KEY-----\nyour_private_key\n-----END_PRIVATE_KEY"
client_email: your_service_account_email
auth_uri: "https://accounts.google.com/o/oauth2/auth"
token_uri: "https://accounts.google.com/o/oauth2/token"
auth_provider_x509_cert_url: "https://www.googleapis.com/oauth2/v1/certs"
client_x509_cert_url: "https://www.googleapis.com/robot/v1/metadata/x509/your_service_account_email"
GOOGLE_SERVICE_ACC_KEY_FILEPATH: "/path/to/your/service/account/key.json"
GOOGLE_LOCATION: US # Optional
# MongoDB
MONGODB_DATABASE: database
MONGODB_HOST: host
MONGODB_PASSWORD: password
MONGODB_PORT: 27017
MONGODB_COLLECTION: collection
MONGODB_USER: user
# MSSQL
MSSQL_DATABASE: database
MSSQL_SCHEMA: schema
MSSQL_DRIVER: "ODBC Driver 18 for SQL Server"
MSSQL_HOST: host
MSSQL_PASSWORD: password
MSSQL_PORT: 1433
MSSQL_USER: SA
# MySQL
MYSQL_DATABASE: database
MYSQL_HOST: host
MYSQL_PASSWORD: password
MYSQL_PORT: 3306
MYSQL_USER: root
# Pinot
PINOT_HOST: hostname
PINOT_PASSWORD: password
PINOT_PATH: /query/sql
PINOT_PORT: 8000
PINOT_SCHEME: http
PINOT_USER: user
# PostgreSQL
POSTGRES_CONNECT_TIMEOUT: 10
POSTGRES_DBNAME: postgres
POSTGRES_SCHEMA: public # Optional
POSTGRES_USER: username
POSTGRES_PASSWORD: password
POSTGRES_HOST: hostname
POSTGRES_PORT: 5432
# Redshift
REDSHIFT_SCHEMA: public # Optional
REDSHIFT_DBNAME: redshift_db_name
REDSHIFT_HOST: redshift_cluster_id.identifier.region.redshift.amazonaws.com
REDSHIFT_PORT: 5439
REDSHIFT_TEMP_CRED_USER: temp_username
REDSHIFT_TEMP_CRED_PASSWORD: temp_password
REDSHIFT_DBUSER: redshift_db_user
REDSHIFT_CLUSTER_ID: redshift_cluster_id
REDSHIFT_IAM_PROFILE: default
# Snowflake
SNOWFLAKE_USER: username
SNOWFLAKE_PASSWORD: password
SNOWFLAKE_ACCOUNT: account_id.region
SNOWFLAKE_DEFAULT_WH: null # Optional default warehouse
SNOWFLAKE_DEFAULT_DB: null # Optional default database
SNOWFLAKE_DEFAULT_SCHEMA: null # Optional default schema
SNOWFLAKE_PRIVATE_KEY_PASSPHRASE: null # Optional private key passphrase
SNOWFLAKE_PRIVATE_KEY_PATH: null # Optional private key path
SNOWFLAKE_ROLE: null # Optional role name
SNOWFLAKE_TIMEOUT: null # Optional timeout in seconds
# Trino
trino:
catalog: postgresql # Change this to the catalog of your choice
host: 127.0.0.1
http_headers:
X-Something: 'mage=power'
http_scheme: http
password: mage1337 # Optional
port: 8080
schema: core_data
session_properties: # Optional
acc01.optimize_locality_enabled: false
optimize_hash_generation: true
source: trino-cli # Optional
user: admin
verify: /path/to/your/ca.crt # Optional
data_type_properties:
timestamp_precision: 3 # Optional. Set TIMESTAMP(n) precision
Was this page helpful?