Title: | Read/Write Simple Feature Objects ('sf') with 'Apache' 'Arrow' |
---|---|
Description: | Support for reading/writing simple feature ('sf') spatial objects from/to 'Parquet' files. 'Parquet' files are an open-source, column-oriented data storage format from Apache (<https://parquet.apache.org/>), now popular across programming languages. This implementation converts simple feature list geometries into well-known binary format for use by 'arrow', and coordinate reference system information is maintained in a standard metadata format. |
Authors: | Chris Jochem [aut, cre] |
Maintainer: | Chris Jochem <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.4.1 |
Built: | 2025-02-26 04:30:58 UTC |
Source: | https://github.com/wcjochem/sfarrow |
sf
objectRead an Arrow multi-file dataset and create sf
object
read_sf_dataset(dataset, find_geom = FALSE)
read_sf_dataset(dataset, find_geom = FALSE)
dataset |
a |
find_geom |
logical. Only needed when returning a subset of columns.
Should all available geometry columns be selected and added to to the
dataset query without being named? Default is |
This function is primarily for use after opening a dataset with
arrow::open_dataset
. Users can then query the arrow Dataset
using dplyr
methods such as filter
or
select
. Passing the resulting query to this function
will parse the datasets and create an sf
object. The function
expects consistent geographic metadata to be stored with the dataset in
order to create sf
objects.
object of class sf
open_dataset
, st_read
, st_read_parquet
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create random grouping nc$group <- sample(1:3, nrow(nc), replace = TRUE) # use dplyr to group the dataset. %>% also allowed nc_g <- dplyr::group_by(nc, group) # write out to parquet datasets tf <- tempfile() # create temporary location on.exit(unlink(tf)) # partitioning determined by dplyr 'group_vars' write_sf_dataset(nc_g, path = tf) list.files(tf, recursive = TRUE) # open parquet files from dataset ds <- arrow::open_dataset(tf) # create a query. %>% also allowed q <- dplyr::filter(ds, group == 1) # read the dataset (piping syntax also works) nc_d <- read_sf_dataset(dataset = q) nc_d plot(sf::st_geometry(nc_d))
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create random grouping nc$group <- sample(1:3, nrow(nc), replace = TRUE) # use dplyr to group the dataset. %>% also allowed nc_g <- dplyr::group_by(nc, group) # write out to parquet datasets tf <- tempfile() # create temporary location on.exit(unlink(tf)) # partitioning determined by dplyr 'group_vars' write_sf_dataset(nc_g, path = tf) list.files(tf, recursive = TRUE) # open parquet files from dataset ds <- arrow::open_dataset(tf) # create a query. %>% also allowed q <- dplyr::filter(ds, group == 1) # read the dataset (piping syntax also works) nc_d <- read_sf_dataset(dataset = q) nc_d plot(sf::st_geometry(nc_d))
sf
objectRead a Feather file. Uses standard metadata information to identify geometry columns and coordinate reference system information.
st_read_feather(dsn, col_select = NULL, ...)
st_read_feather(dsn, col_select = NULL, ...)
dsn |
character file path to a data source |
col_select |
A character vector of column names to keep. Default is
|
... |
additional parameters to pass to
|
Reference for the metadata used:
https://github.com/geopandas/geo-arrow-spec. These are standard with
the Python GeoPandas
library.
object of class sf
# load Natural Earth low-res dataset. # Created in Python with GeoPandas.to_feather() path <- system.file("extdata", package = "sfarrow") world <- st_read_feather(file.path(path, "world.feather")) world plot(sf::st_geometry(world))
# load Natural Earth low-res dataset. # Created in Python with GeoPandas.to_feather() path <- system.file("extdata", package = "sfarrow") world <- st_read_feather(file.path(path, "world.feather")) world plot(sf::st_geometry(world))
sf
objectRead a Parquet file. Uses standard metadata information to identify geometry columns and coordinate reference system information.
st_read_parquet(dsn, col_select = NULL, props = NULL, ...)
st_read_parquet(dsn, col_select = NULL, props = NULL, ...)
dsn |
character file path to a data source |
col_select |
A character vector of column names to keep. Default is
|
props |
Now deprecated in |
... |
additional parameters to pass to
|
Reference for the metadata used:
https://github.com/geopandas/geo-arrow-spec. These are standard with
the Python GeoPandas
library.
object of class sf
# load Natural Earth low-res dataset. # Created in Python with GeoPandas.to_parquet() path <- system.file("extdata", package = "sfarrow") world <- st_read_parquet(file.path(path, "world.parquet")) world plot(sf::st_geometry(world))
# load Natural Earth low-res dataset. # Created in Python with GeoPandas.to_parquet() path <- system.file("extdata", package = "sfarrow") world <- st_read_parquet(file.path(path, "world.parquet")) world plot(sf::st_geometry(world))
sf
object to Feather fileConvert a simple features spatial object from sf
and
write to a Feather file using write_feather
. Geometry
columns (type sfc
) are converted to well-known binary (WKB) format.
st_write_feather(obj, dsn, ...)
st_write_feather(obj, dsn, ...)
obj |
object of class |
dsn |
data source name. A path and file name with .parquet extension |
... |
additional options to pass to |
obj
invisibly
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create temp file tf <- tempfile(fileext = '.feather') on.exit(unlink(tf)) # write out object st_write_feather(obj = nc, dsn = tf) # In Python, read the new file with geopandas.read_feather(...) # read back into R nc_f <- st_read_feather(tf)
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create temp file tf <- tempfile(fileext = '.feather') on.exit(unlink(tf)) # write out object st_write_feather(obj = nc, dsn = tf) # In Python, read the new file with geopandas.read_feather(...) # read back into R nc_f <- st_read_feather(tf)
sf
object to Parquet fileConvert a simple features spatial object from sf
and
write to a Parquet file using write_parquet
. Geometry
columns (type sfc
) are converted to well-known binary (WKB) format.
st_write_parquet(obj, dsn, ...)
st_write_parquet(obj, dsn, ...)
obj |
object of class |
dsn |
data source name. A path and file name with .parquet extension |
... |
additional options to pass to |
obj
invisibly
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create temp file tf <- tempfile(fileext = '.parquet') on.exit(unlink(tf)) # write out object st_write_parquet(obj = nc, dsn = tf) # In Python, read the new file with geopandas.read_parquet(...) # read back into R nc_p <- st_read_parquet(tf)
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create temp file tf <- tempfile(fileext = '.parquet') on.exit(unlink(tf)) # write out object st_write_parquet(obj = nc, dsn = tf) # In Python, read the new file with geopandas.read_parquet(...) # read back into R nc_p <- st_read_parquet(tf)
sf
object to an Arrow multi-file datasetWrite sf
object to an Arrow multi-file dataset
write_sf_dataset( obj, path, format = "parquet", partitioning = dplyr::group_vars(obj), ... )
write_sf_dataset( obj, path, format = "parquet", partitioning = dplyr::group_vars(obj), ... )
obj |
object of class |
path |
string path referencing a directory for the output |
format |
output file format ("parquet" or "feather") |
partitioning |
character vector of columns in |
... |
additional arguments and options passed to
|
Translate an sf
spatial object to data.frame
with WKB
geometry columns and then write to an arrow
dataset with
partitioning. Allows for dplyr
grouped datasets (using
group_by
) and uses those variables to define
partitions.
obj
invisibly
write_dataset
, st_read_parquet
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create random grouping nc$group <- sample(1:3, nrow(nc), replace = TRUE) # use dplyr to group the dataset. %>% also allowed nc_g <- dplyr::group_by(nc, group) # write out to parquet datasets tf <- tempfile() # create temporary location on.exit(unlink(tf)) # partitioning determined by dplyr 'group_vars' write_sf_dataset(nc_g, path = tf) list.files(tf, recursive = TRUE) # open parquet files from dataset ds <- arrow::open_dataset(tf) # create a query. %>% also allowed q <- dplyr::filter(ds, group == 1) # read the dataset (piping syntax also works) nc_d <- read_sf_dataset(dataset = q) nc_d plot(sf::st_geometry(nc_d))
# read spatial object nc <- sf::st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) # create random grouping nc$group <- sample(1:3, nrow(nc), replace = TRUE) # use dplyr to group the dataset. %>% also allowed nc_g <- dplyr::group_by(nc, group) # write out to parquet datasets tf <- tempfile() # create temporary location on.exit(unlink(tf)) # partitioning determined by dplyr 'group_vars' write_sf_dataset(nc_g, path = tf) list.files(tf, recursive = TRUE) # open parquet files from dataset ds <- arrow::open_dataset(tf) # create a query. %>% also allowed q <- dplyr::filter(ds, group == 1) # read the dataset (piping syntax also works) nc_d <- read_sf_dataset(dataset = q) nc_d plot(sf::st_geometry(nc_d))