library(connector)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Create directories for examples
dir.create("data", showWarnings = FALSE)
dir.create("raw", showWarnings = FALSE)
dir.create("processed", showWarnings = FALSE)
dir.create("output", showWarnings = FALSE)
This vignette shows how to create and use connector objects directly in R code without YAML configuration files.
Creating Individual Connectors
You can create connector objects directly using the specific connector functions:
File System Connector
# Create a file system connector
fs_conn <- connector_fs(path = "data")
fs_conn
#> <ConnectorFS>
#> Inherits from: <Connector>
#> Registered methods:
#> • `create_directory_cnt.ConnectorFS()`
#> • `download_cnt.ConnectorFS()`
#> • `download_directory_cnt.ConnectorFS()`
#> • `list_content_cnt.ConnectorFS()`
#> • `log_read_connector.ConnectorFS()`
#> • `log_remove_connector.ConnectorFS()`
#> • `log_write_connector.ConnectorFS()`
#> • `read_cnt.ConnectorFS()`
#> • `remove_cnt.ConnectorFS()`
#> • `remove_directory_cnt.ConnectorFS()`
#> • `tbl_cnt.ConnectorFS()`
#> • `upload_cnt.ConnectorFS()`
#> • `upload_directory_cnt.ConnectorFS()`
#> • `write_cnt.ConnectorFS()`
#> Specifications:
#> • path: data
Database Connector
# Create a database connector (using SQLite)
if (requireNamespace("RSQLite", quietly = TRUE)) {
db_conn <- connector_dbi(
drv = RSQLite::SQLite(),
dbname = ":memory:"
)
db_conn
}
#> <ConnectorDBI>
#> Inherits from: <Connector>
#> Registered methods:
#> • `disconnect_cnt.ConnectorDBI()`
#> • `list_content_cnt.ConnectorDBI()`
#> • `log_read_connector.ConnectorDBI()`
#> • `log_remove_connector.ConnectorDBI()`
#> • `log_write_connector.ConnectorDBI()`
#> • `read_cnt.ConnectorDBI()`
#> • `remove_cnt.ConnectorDBI()`
#> • `tbl_cnt.ConnectorDBI()`
#> • `write_cnt.ConnectorDBI()`
#> Specifications:
#> • conn: <SQLiteConnection>
Using Individual Connectors
# Write and read data
sample_data <- mtcars[1:5, 1:3]
fs_conn |> write_cnt(sample_data, "cars.csv")
fs_conn |> list_content_cnt()
#> [1] "cars.csv"
retrieved_data <- fs_conn |> read_cnt("cars.csv")
#> Rows: 5 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (3): mpg, cyl, disp
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(retrieved_data)
#> # A tibble: 5 × 3
#> mpg cyl disp
#> <dbl> <dbl> <dbl>
#> 1 21 6 160
#> 2 21 6 160
#> 3 22.8 4 108
#> 4 21.4 6 258
#> 5 18.7 8 360
Creating Multiple Connectors with connectors()
The connectors()
function lets you group multiple
connector objects together:
# Create a collection of connectors
my_connectors <- connectors(
raw = connector_fs(path = "raw"),
processed = connector_fs(path = "processed")
)
my_connectors
#> <connectors>
#> $raw <ConnectorFS>
#> $processed <ConnectorFS>
Working with Multiple Connectors
# Use different connectors for different purposes
iris_sample <- iris[1:10, ]
# Store raw data
my_connectors$raw |> write_cnt(iris_sample, "iris_raw.rds")
# Process data
processed <- iris_sample |>
group_by(Species) |>
summarise(mean_length = mean(Sepal.Length))
# Store processed data
my_connectors$processed |> write_cnt(processed, "iris_summary.csv")
# Check contents of each connector
my_connectors$raw |> list_content_cnt()
#> [1] "iris_raw.rds"
my_connectors$processed |> list_content_cnt()
#> [1] "iris_summary.csv"
Mixed Storage Types
You can combine different types of connectors:
if (requireNamespace("RSQLite", quietly = TRUE)) {
# Mix file system and database connectors
mixed_connectors <- connectors(
files = connector_fs(path = "output"),
database = connector_dbi(RSQLite::SQLite(), dbname = ":memory:")
)
# Same data, different storage
test_data <- data.frame(x = 1:3, y = letters[1:3])
mixed_connectors$files |> write_cnt(test_data, "test.csv")
mixed_connectors$database |> write_cnt(test_data, "test_table")
# List contents
mixed_connectors$files |> list_content_cnt()
mixed_connectors$database |> list_content_cnt()
}
#> [1] "test_table"
Nested Connectors with nested_connectors()
You can group multiple connectors()
objects together
using nested_connectors()
:
# Create connectors for different projects
project_a <- connectors(
raw = connector_fs(path = "raw"),
processed = connector_fs(path = "processed")
)
project_b <- connectors(
data = connector_fs(path = "data"),
output = connector_fs(path = "output")
)
# Group them in a nested structure
all_projects <- nested_connectors(
project_a = project_a,
project_b = project_b
)
all_projects
#> <nested_connectors>
#> $project_a <connectors>
#> $project_b <connectors>
Now you can access connectors through the nested structure:
# Access connectors through nested structure
sample_data <- data.frame(x = 1:3, y = letters[1:3])
# Project A operations
all_projects$project_a$raw |> write_cnt(sample_data, "sample.rds")
all_projects$project_a$raw |> list_content_cnt()
#> [1] "iris_raw.rds" "sample.rds"
# Project B operations
all_projects$project_b$data |> write_cnt(sample_data, "input.csv")
all_projects$project_b$data |> list_content_cnt()
#> [1] "cars.csv" "input.csv"
Benefits of Using connectors()
- Organization: Group related storage locations together
- Flexibility: Mix different storage types (files, databases)
- Consistency: Same interface for all storage types
- Clarity: Named connectors make code more readable
Summary
- Use
connector_fs()
andconnector_dbi()
to create individual connectors - Use
connectors()
to group multiple connectors together - Access individual connectors by name:
my_connectors$name
- All connectors use the same functions:
write_cnt()
,read_cnt()
,list_content_cnt()
,remove_cnt()