Datapackage from Dataverse
ODAM: Datapackage from a Dataverse repository¶
Example of a session with R showing how it is possible to retrieve an ODAM datapackage using a keyword from Data INRAE1, a Dataverse repository.
1 Institut National de Recherche pour l'Agriculture, l'Alimentation et l'Environnement. (2018). Data INRAE. https://doi.org/10.14758/9T8G-WJ20
Introduction to Dataverse with R
packages <- c('httr', 'jsonlite', 'jsonvalidate', 'dataverse' )
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())),
repos='http://cran.rstudio.com')
}
library(dataverse)
library(httr)
library(jsonvalidate)
library(jsonlite)
# User Parameters
dataverse_server <- "data.inra.fr"
dataset_keyword <- "tomato"
url_schema <- 'https://inrae.github.io/ODAM/json-schema/odam-data-package.json'
# Define the dataverse server as an env. variable
Sys.setenv("DATAVERSE_SERVER" = dataverse_server)
# Data Search
ds <- dataverse::dataverse_search(dataset_keyword, "dataset")
# Check if a dataset found with the right type
if ( dim(ds)[1]==0 )
stop("No dataset found")
if ( length("dataset" %in% ds$type)==0 )
stop("No dataset found with the right type")
# Get list of files
ds <- ds[ ds$type == "dataset", ]
# search for 'datapackage.json'
ids <- c()
for (i in ds$global_id) {
if (length(dataverse::dataset_files(i))==0)
next
dflist <- as.data.frame(t(simplify2array(dataverse::dataset_files(i))))
if (sum(dflist$label %in% "datapackage.json"))
ids <- c(ids,i)
}
# check if successful search
if ( length(ids)==0 )
stop("No datapackage found")
# if many, take the first
id <- ids[1]
cat(" Dataset found : ",id,"\n")
dflist <- as.data.frame(t(simplify2array(dataverse::dataset_files(id))))
# Get the datapackage.json content file
idx <- which("datapackage.json" %in% dflist$label)
file_id <- dflist$dataFile[[idx]]$id
dp_json <- rawToChar(dataverse::get_file(file_id))
if (is.null(dp_json))
stop("the datapackage returned as null")
# Get the ODAM data package schema
response <- httr::GET(url_schema, config(sslversion=6,ssl_verifypeer=1))
if (response$status_code != 200)
stop("Error while getting the ODAM data package schema")
schema <- httr::content(response, as ='text')
# Validate the JSON against the ODAM data package schema
if (! jsonvalidate::json_validate(dp_json, schema) )
stop("the returned datapackage is not a valid ODAM datapackage")
# Parse the JSON object to a data.frame
dp <- jsonlite::fromJSON(dp_json)
# View metadata
sapply( ls(dflist), function(x){ dflist[[as.character(x)]][[idx]] })
# Do something with dp ...
dp$resources[ c('name', 'title', 'cv_term') ]
$`categories`
[1] "data" "datapackage" "odam" "TSV"
$dataFile
$dataFile$`id`
[1] 100141
$dataFile$persistentId
[1] "doi:10.15454/95JUTK/SLKZUA"
$dataFile$pidURL
[1] "https://doi.org/10.15454/95JUTK/SLKZUA"
$dataFile$filename
[1] "datapackage.json"
$dataFile$contentType
[1] "application/json"
$dataFile$filesize
[1] 46674
$dataFile$description
[1] "ODAM datapackage based on JSON Schema"
$dataFile$storageIdentifier
[1] "s3://prod-datainra:17352703ae5-a43a037ccfaa"
$dataFile$rootDataFileId
[1] -1
$dataFile$md5
[1] "16fc7594899b640028b3d9c634e85d1e"
$dataFile$checksum
$dataFile$checksum$`type`
[1] "MD5"
$dataFile$checksum$value
[1] "16fc7594899b640028b3d9c634e85d1e"
$datasetVersionId
[1] 261953
$description
[1] "ODAM datapackage based on JSON Schema"
$label
[1] "datapackage.json"
$restricted
[1] FALSE
$version
[1] 1
name title cv_term.label cv_term.path
1 plants Plant features whole plant http://purl.obolibrary.org/obo/PO_0000003
2 samples Sample features organ harvesting http://purl.obolibrary.org/obo/OBI_1110046
3 aliquots Aliquots features organ harvesting http://purl.obolibrary.org/obo/OBI_1110046
4 cellwall_metabo Cell wall Compound quantifications chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
5 cellwall_metaboFW Cell Wall Compound quantifications (FW) chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
6 activome Activome Features chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
7 pools Pools of remaining pools organ harvesting http://purl.obolibrary.org/obo/OBI_1110046
8 qMS_metabo MS Compounds quantification chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
9 qNMR_metabo NMR Compounds quantification chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
10 plato_hexosesP Hexoses Phosphate chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
11 lipids_AG Lipids AG chemical entity http://purl.obolibrary.org/obo/CHEBI_24431
12 AminoAcid Amino Acids chemical entity http://purl.obolibrary.org/obo/CHEBI_24431