Skip to contents
library(connector)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
# Create directories for examples
dir.create("data", showWarnings = FALSE)
dir.create("raw", showWarnings = FALSE)
dir.create("processed", showWarnings = FALSE)
dir.create("output", showWarnings = FALSE)

This vignette shows how to create and use connector objects directly in R code without YAML configuration files.

Creating Individual Connectors

You can create connector objects directly using the specific connector functions:

File System Connector

# Create a file system connector
fs_conn <- connector_fs(path = "data")
fs_conn
#> <ConnectorFS>
#> Inherits from: <Connector>
#> Registered methods:
#>  `create_directory_cnt.ConnectorFS()`
#>  `download_cnt.ConnectorFS()`
#>  `download_directory_cnt.ConnectorFS()`
#>  `list_content_cnt.ConnectorFS()`
#>  `log_read_connector.ConnectorFS()`
#>  `log_remove_connector.ConnectorFS()`
#>  `log_write_connector.ConnectorFS()`
#>  `read_cnt.ConnectorFS()`
#>  `remove_cnt.ConnectorFS()`
#>  `remove_directory_cnt.ConnectorFS()`
#>  `tbl_cnt.ConnectorFS()`
#>  `upload_cnt.ConnectorFS()`
#>  `upload_directory_cnt.ConnectorFS()`
#>  `write_cnt.ConnectorFS()`
#> Specifications:
#>  path: data

Database Connector

# Create a database connector (using SQLite)
if (requireNamespace("RSQLite", quietly = TRUE)) {
  db_conn <- connector_dbi(
    drv = RSQLite::SQLite(),
    dbname = ":memory:"
  )
  db_conn
}
#> <ConnectorDBI>
#> Inherits from: <Connector>
#> Registered methods:
#>  `disconnect_cnt.ConnectorDBI()`
#>  `list_content_cnt.ConnectorDBI()`
#>  `log_read_connector.ConnectorDBI()`
#>  `log_remove_connector.ConnectorDBI()`
#>  `log_write_connector.ConnectorDBI()`
#>  `read_cnt.ConnectorDBI()`
#>  `remove_cnt.ConnectorDBI()`
#>  `tbl_cnt.ConnectorDBI()`
#>  `write_cnt.ConnectorDBI()`
#> Specifications:
#>  conn: <SQLiteConnection>

Using Individual Connectors

# Write and read data
sample_data <- mtcars[1:5, 1:3]

fs_conn |> write_cnt(sample_data, "cars.csv")
fs_conn |> list_content_cnt()
#> [1] "cars.csv"

retrieved_data <- fs_conn |> read_cnt("cars.csv")
#> Rows: 5 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (3): mpg, cyl, disp
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(retrieved_data)
#> # A tibble: 5 × 3
#>     mpg   cyl  disp
#>   <dbl> <dbl> <dbl>
#> 1  21       6   160
#> 2  21       6   160
#> 3  22.8     4   108
#> 4  21.4     6   258
#> 5  18.7     8   360

Creating Multiple Connectors with connectors()

The connectors() function lets you group multiple connector objects together:

# Create a collection of connectors
my_connectors <- connectors(
  raw = connector_fs(path = "raw"),
  processed = connector_fs(path = "processed")
)

my_connectors
#> <connectors>
#>   $raw <ConnectorFS>
#>   $processed <ConnectorFS>

Working with Multiple Connectors

# Use different connectors for different purposes
iris_sample <- iris[1:10, ]

# Store raw data
my_connectors$raw |> write_cnt(iris_sample, "iris_raw.rds")

# Process data
processed <- iris_sample |>
  group_by(Species) |>
  summarise(mean_length = mean(Sepal.Length))

# Store processed data
my_connectors$processed |> write_cnt(processed, "iris_summary.csv")

# Check contents of each connector
my_connectors$raw |> list_content_cnt()
#> [1] "iris_raw.rds"
my_connectors$processed |> list_content_cnt()
#> [1] "iris_summary.csv"

Mixed Storage Types

You can combine different types of connectors:

if (requireNamespace("RSQLite", quietly = TRUE)) {
  # Mix file system and database connectors
  mixed_connectors <- connectors(
    files = connector_fs(path = "output"),
    database = connector_dbi(RSQLite::SQLite(), dbname = ":memory:")
  )

  # Same data, different storage
  test_data <- data.frame(x = 1:3, y = letters[1:3])

  mixed_connectors$files |> write_cnt(test_data, "test.csv")
  mixed_connectors$database |> write_cnt(test_data, "test_table")

  # List contents
  mixed_connectors$files |> list_content_cnt()
  mixed_connectors$database |> list_content_cnt()
}
#> [1] "test_table"

Nested Connectors with nested_connectors()

You can group multiple connectors() objects together using nested_connectors():

# Create connectors for different projects
project_a <- connectors(
  raw = connector_fs(path = "raw"),
  processed = connector_fs(path = "processed")
)

project_b <- connectors(
  data = connector_fs(path = "data"),
  output = connector_fs(path = "output")
)

# Group them in a nested structure
all_projects <- nested_connectors(
  project_a = project_a,
  project_b = project_b
)

all_projects
#> <nested_connectors>
#>   $project_a <connectors>
#>   $project_b <connectors>

Now you can access connectors through the nested structure:

# Access connectors through nested structure
sample_data <- data.frame(x = 1:3, y = letters[1:3])

# Project A operations
all_projects$project_a$raw |> write_cnt(sample_data, "sample.rds")
all_projects$project_a$raw |> list_content_cnt()
#> [1] "iris_raw.rds" "sample.rds"

# Project B operations
all_projects$project_b$data |> write_cnt(sample_data, "input.csv")
all_projects$project_b$data |> list_content_cnt()
#> [1] "cars.csv"  "input.csv"

Benefits of Using connectors()

  • Organization: Group related storage locations together
  • Flexibility: Mix different storage types (files, databases)
  • Consistency: Same interface for all storage types
  • Clarity: Named connectors make code more readable

Summary