<!DOCTYPE html>

00-exploration-of-data

00-exploration-of-data

TUTORIAL: The aims of this vignette is to learn how to request tables with dplyr package.

Installation

Install the present package and use its core functions

remotes::install_github('inrae/diades.atlas')

Packages

## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attachement du package : 'diades.atlas'
## L'objet suivant est masqué _par_ '.GlobalEnv':
## 
##     frontiers
# pkgload::load_all()

You can use this package without opening the Shiny application

Connect to the database

se <- new.env()

conn_eurodiad <- dbConnect(
  RPostgres::Postgres(), 
  dbname = 'eurodiad',
  host = '<your-host>',
  port = 5432,
  user = '<your-username>',
  password = rstudioapi::askForPassword("Database password"))

session$userData$con <-conn_eurodiad
# For ThinkR only
se <- new.env()
conn_eurodiad <- connect(session = se)
DBI::dbListTables(conn_eurodiad)
##  [1] "outlet_distance"          "hydiadparameter"          "basin_location"           "ices_rectangle2"         
##  [5] "ices_division"            "cices"                    "abundance"                "v_abundance"             
##  [9] "dbversion"                "v_iucn"                   "abundance_level"          "v_hybrid_model_mavg"     
## [13] "v_ecosystemic_services"   "v_occurence"              "v_basin"                  "v_species_ices_occurence"
## [17] "v_hybrid_model"           "species_iucn"             "ices_area"                "ices_rectangle"          
## [21] "iucn_classification"      "iucn_level"               "species_division"         "species_rectangle"       
## [25] "v_ices_geom"              "esvalue"                  "locale"                   "hybrid_model_result"     
## [29] "casestudy"                "ecosystemic_services"     "basin"                    "basin_outlet"            
## [33] "casestudy_basin"          "climatic_model"           "species"                  "category"                
## [37] "subcategory"              "v_maxvalue_es"            "lang85_85"                "ices_grass"

Explore data

It is recommended to use tbl() instead of dbGetQuery() to connect to a table in the database.
tbl() does not download the table, it only connect to the database and let it run the query. This means that if you run a filter() or a mutate() after that, the database executes it, not R, which is more efficient.
However, at some point, some of your {tidyverse} operations can not be realised or translated in SQL, hence, you need to download the resulting dataset using collect(). Then you can continue as usual.

For instance.

  • dbGetQuery() download data in R
    • filter() is executed by R in your R session
  • Note that !! is a specific to using {dplyr} with SQL there.
    • This is because the variable after it (e.g. species_id) is defined in the R session, but not in the database. Therefore, before sending the SQL query to the database, R has to transform the variable by its real value in R. Otherwise, it will send the word "species_id" which does not make sense for the SQL database, instead of the number you wanted to put.
# Create a function
get_data_dbi <- function(conn_eurodiad, species_id, scenario) {
  query <- "SELECT * FROM diadesatlas.v_hybrid_model"
  
  dbGetQuery(conn_eurodiad, query) %>%
    filter(species_id %in% !!species_id,
           climatic_scenario %in% !!scenario) 
}

# Use it
get_data_dbi(conn_eurodiad,
             species_id = c(6),
             scenario = 'rcp85')
##    hybrid_model_result_id basin_id species_id climatic_model_id year        nit        hsi saturation_rate
## 1                   60705        1          6                 2 1951 1176.10865 0.24424759     0.557657000
## 2                   60706        1          6                 2 1952 1143.58207 0.32226324     0.410966634
## 3                   60707        1          6                 2 1953 1119.68854 0.19459266     0.666378201
## 4                   60708        1          6                 2 1954 1096.77221 0.24571498     0.516933634
## 5                   60709        1          6                 2 1955  733.80042 0.08498220     1.000000000
## 6                   60710        1          6                 2 1956  997.76524 0.23510293     0.491496377
## 7                   60711        1          6                 2 1957  948.55155 0.32735810     0.335573647
## 8                   60712        1          6                 2 1958  911.75573 0.41504563     0.254409106
## 9                   60713        1          6                 2 1959  724.22078 0.24550794     0.341629559
## 10                  60714        1          6                 2 1960  669.87258 0.37964568     0.204344865
## 11                  60715        1          6                 2 1961  605.41363 0.27040872     0.259287450
## 12                  60716        1          6                 2 1962  684.41456 0.24577305     0.322503903
## 13                  60717        1          6                 2 1963  565.59345 0.22412295     0.292259095
## 14                  60718        1          6                 2 1964  449.55752 0.25041559     0.207909291
## 15                  60719        1          6                 2 1965  331.77212 0.32038514     0.119927153
## 16                  60720        1          6                 2 1966  317.39593 0.25260188     0.145517346
## 17                  60721        1          6                 2 1967  280.88310 0.36233798     0.089776310
## 18                  60722        1          6                 2 1968  229.64019 0.26426540     0.100636979
## 19                  60723        1          6                 2 1969  130.63829 0.32815258     0.046104642
## 20                  60724        1          6                 2 1970   77.45504 0.07997118     0.112167277
## 21                  60725        1          6                 2 1971   49.64420 0.32295007     0.017802589
## 22                  60726        1          6                 2 1972   36.16250 0.26076233     0.016060668
## 23                  60727        1          6                 2 1973   17.78938 0.32594177     0.006320782
## 24                  60728        1          6                 2 1974    5.98065 0.32919022     0.002104028
## 25                  60729        1          6                 2 1975    0.00000 0.33330411     0.000000000
## 26                  60730        1          6                 2 1976    0.00000 0.07650311     0.000000000
## 27                  60731        1          6                 2 1977    0.00000 0.12106809     0.000000000
## 28                  60732        1          6                 2 1978    0.00000 0.24494783     0.000000000
## 29                  60733        1          6                 2 1979    0.00000 0.26687631     0.000000000
## 30                  60734        1          6                 2 1980    0.00000 0.24788790     0.000000000
## 31                  60735        1          6                 2 1981    0.00000 0.25248367     0.000000000
## 32                  60736        1          6                 2 1982    0.00000 0.05853723     0.000000000
## 33                  60737        1          6                 2 1983    0.00000 0.43384268     0.000000000
## 34                  60738        1          6                 2 1984    0.00000 0.24605194     0.000000000
## 35                  60739        1          6                 2 1985    0.00000 0.32409335     0.000000000
## 36                  60740        1          6                 2 1986    0.00000 0.30049253     0.000000000
## 37                  60741        1          6                 2 1987    0.00000 0.31984017     0.000000000
## 38                  60742        1          6                 2 1988    0.00000 0.24944539     0.000000000
## 39                  60743        1          6                 2 1989    0.00000 0.26567151     0.000000000
## 40                  60744        1          6                 2 1990    0.00000 0.23825317     0.000000000
## 41                  60745        1          6                 2 1991    0.00000 0.32738422     0.000000000
## 42                  60746        1          6                 2 1992    0.00000 0.32538706     0.000000000
## 43                  60747        1          6                 2 1993    0.00000 0.31455448     0.000000000
## 44                  60748        1          6                 2 1994    0.00000 0.25445784     0.000000000
## 45                  60749        1          6                 2 1995    0.00000 0.32881698     0.000000000
## 46                  60750        1          6                 2 1996    0.00000 0.40264379     0.000000000
## 47                  60751        1          6                 2 1997    0.00000 0.32566102     0.000000000
## 48                  60752        1          6                 2 1998    0.00000 0.35783520     0.000000000
## 49                  60753        1          6                 2 1999    0.00000 0.37290455     0.000000000
## 50                  60754        1          6                 2 2000    0.00000 0.33744471     0.000000000
## 51                  60755        1          6                 2 2001    0.00000 0.26335707     0.000000000
## 52                  60756        1          6                 2 2002    0.00000 0.17640363     0.000000000
## 53                  60757        1          6                 2 2003    0.00000 0.35140068     0.000000000
## 54                  60758        1          6                 2 2004    0.00000 0.29239221     0.000000000
## 55                  60759        1          6                 2 2005    0.00000 0.18490081     0.000000000
## 56                  60760        1          6                 2 2006    0.00000 0.25068678     0.000000000
## 57                  60761        1          6                 2 2007    0.00000 0.25083582     0.000000000
## 58                  60762        1          6                 2 2008    0.00000 0.12043068     0.000000000
##    climatic_scenario climatic_model_code oceanic_model     atmospheric_model climatic_model_hm basin_name
## 1              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 2              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 3              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 4              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 5              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 6              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 7              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 8              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 9              rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 10             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 11             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 12             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 13             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 14             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 15             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 16             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 17             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 18             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 19             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 20             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 21             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 22             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 23             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 24             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 25             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 26             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 27             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 28             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 29             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 30             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 31             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 32             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 33             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 34             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 35             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 36             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 37             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 38             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 39             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 40             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 41             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 42             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 43             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 44             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 45             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 46             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 47             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 48             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 49             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 50             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 51             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 52             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 53             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 54             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 55             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 56             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 57             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
## 58             rcp85             cnrmcm5      CNRM-CM5 CNRM-CERFACS-CNRM-CM5                cn         Aa
##     fish_name  latin_name simplified_name
## 1  Allis shad Alosa alosa        A. alosa
## 2  Allis shad Alosa alosa        A. alosa
## 3  Allis shad Alosa alosa        A. alosa
## 4  Allis shad Alosa alosa        A. alosa
## 5  Allis shad Alosa alosa        A. alosa
## 6  Allis shad Alosa alosa        A. alosa
## 7  Allis shad Alosa alosa        A. alosa
## 8  Allis shad Alosa alosa        A. alosa
## 9  Allis shad Alosa alosa        A. alosa
## 10 Allis shad Alosa alosa        A. alosa
## 11 Allis shad Alosa alosa        A. alosa
## 12 Allis shad Alosa alosa        A. alosa
## 13 Allis shad Alosa alosa        A. alosa
## 14 Allis shad Alosa alosa        A. alosa
## 15 Allis shad Alosa alosa        A. alosa
## 16 Allis shad Alosa alosa        A. alosa
## 17 Allis shad Alosa alosa        A. alosa
## 18 Allis shad Alosa alosa        A. alosa
## 19 Allis shad Alosa alosa        A. alosa
## 20 Allis shad Alosa alosa        A. alosa
## 21 Allis shad Alosa alosa        A. alosa
## 22 Allis shad Alosa alosa        A. alosa
## 23 Allis shad Alosa alosa        A. alosa
## 24 Allis shad Alosa alosa        A. alosa
## 25 Allis shad Alosa alosa        A. alosa
## 26 Allis shad Alosa alosa        A. alosa
## 27 Allis shad Alosa alosa        A. alosa
## 28 Allis shad Alosa alosa        A. alosa
## 29 Allis shad Alosa alosa        A. alosa
## 30 Allis shad Alosa alosa        A. alosa
## 31 Allis shad Alosa alosa        A. alosa
## 32 Allis shad Alosa alosa        A. alosa
## 33 Allis shad Alosa alosa        A. alosa
## 34 Allis shad Alosa alosa        A. alosa
## 35 Allis shad Alosa alosa        A. alosa
## 36 Allis shad Alosa alosa        A. alosa
## 37 Allis shad Alosa alosa        A. alosa
## 38 Allis shad Alosa alosa        A. alosa
## 39 Allis shad Alosa alosa        A. alosa
## 40 Allis shad Alosa alosa        A. alosa
## 41 Allis shad Alosa alosa        A. alosa
## 42 Allis shad Alosa alosa        A. alosa
## 43 Allis shad Alosa alosa        A. alosa
## 44 Allis shad Alosa alosa        A. alosa
## 45 Allis shad Alosa alosa        A. alosa
## 46 Allis shad Alosa alosa        A. alosa
## 47 Allis shad Alosa alosa        A. alosa
## 48 Allis shad Alosa alosa        A. alosa
## 49 Allis shad Alosa alosa        A. alosa
## 50 Allis shad Alosa alosa        A. alosa
## 51 Allis shad Alosa alosa        A. alosa
## 52 Allis shad Alosa alosa        A. alosa
## 53 Allis shad Alosa alosa        A. alosa
## 54 Allis shad Alosa alosa        A. alosa
## 55 Allis shad Alosa alosa        A. alosa
## 56 Allis shad Alosa alosa        A. alosa
## 57 Allis shad Alosa alosa        A. alosa
## 58 Allis shad Alosa alosa        A. alosa
##  [ reached 'max' / getOption("max.print") -- omitted 60242 rows ]
  • tbl() only connects to the table, only a glimpse of the data is presented
get_data_tbl <- function(conn_eurodiad, species_id, scenario) {
  tbl(conn_eurodiad, "v_hybrid_model") %>% 
    filter(species_id %in% !!species_id,
           climatic_scenario %in% !!scenario) 
}

# Use it
get_data_tbl(conn_eurodiad,
             species_id = c(6),
             scenario = 'rcp85')
## # Source:   lazy query [?? x 17]
## # Database: postgres [ptlambert@citerne.bordeaux.irstea.priv:5432/eurodiad]
##    hybrid_model_result_id basin_id species_id climatic_model_id  year   nit    hsi saturation_rate climatic_scenar…
##                     <int>    <int>      <int>             <int> <int> <dbl>  <dbl>           <dbl> <chr>           
##  1                  60705        1          6                 2  1951 1176. 0.244            0.558 rcp85           
##  2                  60706        1          6                 2  1952 1144. 0.322            0.411 rcp85           
##  3                  60707        1          6                 2  1953 1120. 0.195            0.666 rcp85           
##  4                  60708        1          6                 2  1954 1097. 0.246            0.517 rcp85           
##  5                  60709        1          6                 2  1955  734. 0.0850           1     rcp85           
##  6                  60710        1          6                 2  1956  998. 0.235            0.491 rcp85           
##  7                  60711        1          6                 2  1957  949. 0.327            0.336 rcp85           
##  8                  60712        1          6                 2  1958  912. 0.415            0.254 rcp85           
##  9                  60713        1          6                 2  1959  724. 0.246            0.342 rcp85           
## 10                  60714        1          6                 2  1960  670. 0.380            0.204 rcp85           
## # … with more rows, and 8 more variables: climatic_model_code <chr>, oceanic_model <chr>,
## #   atmospheric_model <chr>, climatic_model_hm <chr>, basin_name <chr>, fish_name <chr>, latin_name <chr>,
## #   simplified_name <chr>
  • If you need to create some {ggplot2} figures after that, you can collect
get_data_tbl_collect <- function(conn_eurodiad, species_id, scenario) {
  tbl(conn_eurodiad, "v_hybrid_model") %>% 
    filter(species_id %in% !!species_id,
           climatic_scenario %in% !!scenario) %>% 
    # From collect(), the full data is in the R session
    collect()
}

# Use it
get_data_tbl_collect(conn_eurodiad,
                     species_id = c(6),
                     scenario = 'rcp85') %>% 
  filter(basin_id == 1) %>% 
  ggplot(aes(x = year)) +
  geom_line(aes(y = hsi))

  • Note that you can use a full SQL query with tbl()
get_data_tbl_query_collect <- function(conn_eurodiad, species_id, scenario) {
  query <- "SELECT * FROM diadesatlas.v_hybrid_model"
  
  tbl(conn_eurodiad, sql(query)) %>% 
    # Adds a filter to be run as SQL in the database
    filter(species_id %in% !!species_id,
           climatic_scenario %in% !!scenario) %>% 
    # From collect(), the full data is in the R session
    collect()
}

# Use it
get_data_tbl_query_collect(conn_eurodiad,
                           species_id = c(6),
                           scenario = 'rcp85') %>% 
  filter(basin_id == 1) %>% 
  ggplot(aes(x = year)) +
  geom_line(aes(y = hsi))

Get version

tbl(conn_eurodiad, "dbversion") %>% pull(dbversion_number)
## [1] "1.0.0" "1.0.1" "1.0.2"

Disconnect from the database

DBI::dbDisconnect(conn_eurodiad)