These are notes for the participatory live-coding portion of a workshop. Slides are here: https://cct-datascience.quarto.pub/demystifying-apis-slides/
library (httr2)
library (tidyverse) #used later on for wrangling results
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Search
Build the query
req_search <-
request ("https://commonchemistry.cas.org/api" ) |>
req_url_path_append ("search" ) |>
req_url_query (q = "caffeine" )
req_search
GET https://commonchemistry.cas.org/api/search?q=caffeine
Get the results as a list
caffeine_search <-
resp_search |> resp_body_json ()
caffeine_search
$count
[1] 1
$results
$results[[1]]
$results[[1]]$rn
[1] "58-08-2"
$results[[1]]$name
[1] "Caffeine"
$results[[1]]$image
[1] "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-rendering=\"auto\" stroke-width=\"1\" stroke-opacity=\"1\" stroke-miterlimit=\"10\" stroke-linejoin=\"miter\" stroke-linecap=\"square\" stroke-dashoffset=\"0\" stroke-dasharray=\"none\" stroke=\"black\" shape-rendering=\"auto\" image-rendering=\"auto\" height=\"152.4\" font-weight=\"normal\" font-style=\"normal\" font-size=\"12\" font-family=\"'Dialog'\" fill-opacity=\"1\" fill=\"black\" color-rendering=\"auto\" color-interpolation=\"auto\" xmlns=\"http://www.w3.org/2000/svg\"><g><g stroke=\"white\" fill=\"white\"><rect y=\"0\" x=\"0\" width=\"6663\" stroke=\"none\" height=\"5080\"/></g><g transform=\"translate(32866,32758)\" text-rendering=\"geometricPrecision\" stroke-width=\"44\" stroke-linejoin=\"round\" stroke-linecap=\"round\"><line y2=\"-29668\" y1=\"-30807\" x2=\"-29642\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29810\" y1=\"-30665\" x2=\"-29813\" x1=\"-29813\" fill=\"none\"/><line y2=\"-29099\" y1=\"-29668\" x2=\"-30625\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29668\" y1=\"-29668\" x2=\"-29642\" x1=\"-27860\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-29642\" x1=\"-30437\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-28842\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29562\" y1=\"-29099\" x2=\"-31425\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29888\" y1=\"-30807\" x2=\"-27672\" x1=\"-27672\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-30815\" x1=\"-31613\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-27672\" x1=\"-28467\" fill=\"none\"/><line y2=\"-30717\" y1=\"-31061\" x2=\"-27857\" x1=\"-28452\" fill=\"none\"/><line y2=\"-30807\" y1=\"-29888\" x2=\"-31613\" x1=\"-31613\" fill=\"none\"/><line y2=\"-29117\" y1=\"-29481\" x2=\"-27120\" x1=\"-27484\" fill=\"none\"/><line y2=\"-32202\" y1=\"-31595\" x2=\"-30625\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29313\" y1=\"-29562\" x2=\"-32231\" x1=\"-31800\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30555\" x1=\"-30555\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30697\" x1=\"-30697\" fill=\"none\"/><line y2=\"-31162\" y1=\"-30746\" x2=\"-32367\" x1=\"-31647\" fill=\"none\"/><line y2=\"-31283\" y1=\"-30867\" x2=\"-32297\" x1=\"-31578\" fill=\"none\"/><text y=\"-27874\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-28750\" xml:space=\"preserve\" x=\"-27083\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28660\" xml:space=\"preserve\" x=\"-26505\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-32242\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-32153\" xml:space=\"preserve\" x=\"-30200\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-28914\" xml:space=\"preserve\" x=\"-32762\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28825\" xml:space=\"preserve\" x=\"-32185\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-31150\" xml:space=\"preserve\" x=\"-32673\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-27822\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-28808\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-31766\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text></g></g></svg>"
There’s only one result here, so we’ll simplify this list a bit by just pulling out the first (only) element of the results.
caffeine <- caffeine_search[["results" ]][[1 ]]
caffeine
$rn
[1] "58-08-2"
$name
[1] "Caffeine"
$image
[1] "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-rendering=\"auto\" stroke-width=\"1\" stroke-opacity=\"1\" stroke-miterlimit=\"10\" stroke-linejoin=\"miter\" stroke-linecap=\"square\" stroke-dashoffset=\"0\" stroke-dasharray=\"none\" stroke=\"black\" shape-rendering=\"auto\" image-rendering=\"auto\" height=\"152.4\" font-weight=\"normal\" font-style=\"normal\" font-size=\"12\" font-family=\"'Dialog'\" fill-opacity=\"1\" fill=\"black\" color-rendering=\"auto\" color-interpolation=\"auto\" xmlns=\"http://www.w3.org/2000/svg\"><g><g stroke=\"white\" fill=\"white\"><rect y=\"0\" x=\"0\" width=\"6663\" stroke=\"none\" height=\"5080\"/></g><g transform=\"translate(32866,32758)\" text-rendering=\"geometricPrecision\" stroke-width=\"44\" stroke-linejoin=\"round\" stroke-linecap=\"round\"><line y2=\"-29668\" y1=\"-30807\" x2=\"-29642\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29810\" y1=\"-30665\" x2=\"-29813\" x1=\"-29813\" fill=\"none\"/><line y2=\"-29099\" y1=\"-29668\" x2=\"-30625\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29668\" y1=\"-29668\" x2=\"-29642\" x1=\"-27860\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-29642\" x1=\"-30437\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-28842\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29562\" y1=\"-29099\" x2=\"-31425\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29888\" y1=\"-30807\" x2=\"-27672\" x1=\"-27672\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-30815\" x1=\"-31613\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-27672\" x1=\"-28467\" fill=\"none\"/><line y2=\"-30717\" y1=\"-31061\" x2=\"-27857\" x1=\"-28452\" fill=\"none\"/><line y2=\"-30807\" y1=\"-29888\" x2=\"-31613\" x1=\"-31613\" fill=\"none\"/><line y2=\"-29117\" y1=\"-29481\" x2=\"-27120\" x1=\"-27484\" fill=\"none\"/><line y2=\"-32202\" y1=\"-31595\" x2=\"-30625\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29313\" y1=\"-29562\" x2=\"-32231\" x1=\"-31800\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30555\" x1=\"-30555\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30697\" x1=\"-30697\" fill=\"none\"/><line y2=\"-31162\" y1=\"-30746\" x2=\"-32367\" x1=\"-31647\" fill=\"none\"/><line y2=\"-31283\" y1=\"-30867\" x2=\"-32297\" x1=\"-31578\" fill=\"none\"/><text y=\"-27874\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-28750\" xml:space=\"preserve\" x=\"-27083\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28660\" xml:space=\"preserve\" x=\"-26505\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-32242\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-32153\" xml:space=\"preserve\" x=\"-30200\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-28914\" xml:space=\"preserve\" x=\"-32762\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28825\" xml:space=\"preserve\" x=\"-32185\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-31150\" xml:space=\"preserve\" x=\"-32673\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-27822\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-28808\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-31766\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text></g></g></svg>"
Convert to data frame
We can convert this to a data frame or tibble.
# A tibble: 1 × 3
rn name image
<chr> <chr> <chr>
1 58-08-2 Caffeine "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-render…
Detail
Now let’s get more information with the /detail
endpoint
Build the query
We can pass the registry number directly from our first query into this one.
req_detail <-
request ("https://commonchemistry.cas.org/api" ) |>
req_url_path_append ("detail" ) |>
req_url_query (cas_rn = caffeine$ rn)
req_detail
GET https://commonchemistry.cas.org/api/detail?cas_rn=58-08-2
Get the results
caffeine_detail <-
resp_detail |>
resp_body_json ()
caffeine_detail
$uri
[1] "substance/pt/58082"
$rn
[1] "58-08-2"
$name
[1] "Caffeine"
$image
[1] "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-rendering=\"auto\" stroke-width=\"1\" stroke-opacity=\"1\" stroke-miterlimit=\"10\" stroke-linejoin=\"miter\" stroke-linecap=\"square\" stroke-dashoffset=\"0\" stroke-dasharray=\"none\" stroke=\"black\" shape-rendering=\"auto\" image-rendering=\"auto\" height=\"152.4\" font-weight=\"normal\" font-style=\"normal\" font-size=\"12\" font-family=\"'Dialog'\" fill-opacity=\"1\" fill=\"black\" color-rendering=\"auto\" color-interpolation=\"auto\" xmlns=\"http://www.w3.org/2000/svg\"><g><g stroke=\"white\" fill=\"white\"><rect y=\"0\" x=\"0\" width=\"6663\" stroke=\"none\" height=\"5080\"/></g><g transform=\"translate(32866,32758)\" text-rendering=\"geometricPrecision\" stroke-width=\"44\" stroke-linejoin=\"round\" stroke-linecap=\"round\"><line y2=\"-29668\" y1=\"-30807\" x2=\"-29642\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29810\" y1=\"-30665\" x2=\"-29813\" x1=\"-29813\" fill=\"none\"/><line y2=\"-29099\" y1=\"-29668\" x2=\"-30625\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29668\" y1=\"-29668\" x2=\"-29642\" x1=\"-27860\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-29642\" x1=\"-30437\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-28842\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29562\" y1=\"-29099\" x2=\"-31425\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29888\" y1=\"-30807\" x2=\"-27672\" x1=\"-27672\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-30815\" x1=\"-31613\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-27672\" x1=\"-28467\" fill=\"none\"/><line y2=\"-30717\" y1=\"-31061\" x2=\"-27857\" x1=\"-28452\" fill=\"none\"/><line y2=\"-30807\" y1=\"-29888\" x2=\"-31613\" x1=\"-31613\" fill=\"none\"/><line y2=\"-29117\" y1=\"-29481\" x2=\"-27120\" x1=\"-27484\" fill=\"none\"/><line y2=\"-32202\" y1=\"-31595\" x2=\"-30625\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29313\" y1=\"-29562\" x2=\"-32231\" x1=\"-31800\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30555\" x1=\"-30555\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30697\" x1=\"-30697\" fill=\"none\"/><line y2=\"-31162\" y1=\"-30746\" x2=\"-32367\" x1=\"-31647\" fill=\"none\"/><line y2=\"-31283\" y1=\"-30867\" x2=\"-32297\" x1=\"-31578\" fill=\"none\"/><text y=\"-27874\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-28750\" xml:space=\"preserve\" x=\"-27083\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28660\" xml:space=\"preserve\" x=\"-26505\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-32242\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-32153\" xml:space=\"preserve\" x=\"-30200\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-28914\" xml:space=\"preserve\" x=\"-32762\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28825\" xml:space=\"preserve\" x=\"-32185\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-31150\" xml:space=\"preserve\" x=\"-32673\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-27822\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-28808\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-31766\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text></g></g></svg>"
$inchi
[1] "InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3"
$inchiKey
[1] "InChIKey=RYYVLZVUVIJVGH-UHFFFAOYSA-N"
$smile
[1] "O=C1C2=C(N(C)C(=O)N1C)N=CN2C"
$canonicalSmile
[1] "O=C1C2=C(N=CN2C)N(C(=O)N1C)C"
$molecularFormula
[1] "C<sub>8</sub>H<sub>10</sub>N<sub>4</sub>O<sub>2</sub>"
$molecularMass
[1] "194.19"
$experimentalProperties
$experimentalProperties[[1]]
$experimentalProperties[[1]]$name
[1] "Melting Point"
$experimentalProperties[[1]]$property
[1] "238 °C"
$experimentalProperties[[1]]$sourceNumber
[1] 1
$experimentalProperties[[2]]
$experimentalProperties[[2]]$name
[1] "Density"
$experimentalProperties[[2]]$property
[1] "1.23 g/cm³ @ Temp: 18 °C"
$experimentalProperties[[2]]$sourceNumber
[1] 2
$propertyCitations
$propertyCitations[[1]]
$propertyCitations[[1]]$docUri
[1] ""
$propertyCitations[[1]]$sourceNumber
[1] 1
$propertyCitations[[1]]$source
[1] "International Chemical Safety Cards data were obtained from the National Institute for Occupational Safety and Health (US)"
$propertyCitations[[2]]
$propertyCitations[[2]]$docUri
[1] ""
$propertyCitations[[2]]$sourceNumber
[1] 2
$propertyCitations[[2]]$source
[1] "Hazardous Substances Data Bank data were obtained from the National Library of Medicine (US)"
$synonyms
$synonyms[[1]]
[1] "1<em>H</em>-Purine-2,6-dione, 3,7-dihydro-1,3,7-trimethyl-"
$synonyms[[2]]
[1] "Caffeine"
$synonyms[[3]]
[1] "3,7-Dihydro-1,3,7-trimethyl-1<em>H</em>-purine-2,6-dione"
$synonyms[[4]]
[1] "Guaranine"
$synonyms[[5]]
[1] "Methyltheobromine"
$synonyms[[6]]
[1] "No-Doz"
$synonyms[[7]]
[1] "Thein"
$synonyms[[8]]
[1] "Theine"
$synonyms[[9]]
[1] "1,3,7-Trimethyl-2,6-dioxopurine"
$synonyms[[10]]
[1] "1,3,7-Trimethylxanthine"
$synonyms[[11]]
[1] "Caffein"
$synonyms[[12]]
[1] "Cafipel"
$synonyms[[13]]
[1] "Alert-Pep"
$synonyms[[14]]
[1] "Koffein"
$synonyms[[15]]
[1] "Cafeina"
$synonyms[[16]]
[1] "Mateina"
$synonyms[[17]]
[1] "Refresh'n"
$synonyms[[18]]
[1] "Stim"
$synonyms[[19]]
[1] "Tri-Aqua"
$synonyms[[20]]
[1] "7-Methyltheophylline"
$synonyms[[21]]
[1] "Shape Plus"
$synonyms[[22]]
[1] "Caffedrine"
$synonyms[[23]]
[1] "Diurex"
$synonyms[[24]]
[1] "Stay Alert"
$synonyms[[25]]
[1] "Synalgos"
$synonyms[[26]]
[1] "Miudol"
$synonyms[[27]]
[1] "Wigraine"
$synonyms[[28]]
[1] "Dasin"
$synonyms[[29]]
[1] "Phensal"
$synonyms[[30]]
[1] "DHCplus"
$synonyms[[31]]
[1] "NSC 5036"
$synonyms[[32]]
[1] "Durvitan"
$synonyms[[33]]
[1] "Cafalgine"
$synonyms[[34]]
[1] "Midron extra"
$synonyms[[35]]
[1] "New Cetamol"
$synonyms[[36]]
[1] "Asia migrine"
$synonyms[[37]]
[1] "Palergot-C"
$synonyms[[38]]
[1] "Vivarin"
$synonyms[[39]]
[1] "1,3,7-Trimethyl-7<em>H</em>-purine-2,6-dione"
$synonyms[[40]]
[1] "1,3,7-Trimethyl-2,3,6,7-tetrahydro-1<em>H</em>-purine-2,6-dione"
$synonyms[[41]]
[1] "1,3,7-Trimethyl-3,7-dihydro-purine-2,6-dione"
$synonyms[[42]]
[1] "MeSH ID: D002110"
$replacedRns
$replacedRns[[1]]
[1] "95789-13-2"
$replacedRns[[2]]
[1] "71701-02-5"
$hasMolfile
[1] TRUE
Wrangle the results
Lists are hard to work with! Let’s try to turn this into a tibble.
as_tibble (caffeine_detail)
Error in `recycle_columns()`:
! Tibble columns must have compatible sizes.
• Size 2: Columns `experimentalProperties`, `propertyCitations`, and
`replacedRns`.
• Size 42: Column `synonyms`.
ℹ Only values of size one are recycled.
The list isn’t a nice rectangle, so it can’t be easily converted into a tibble.
We can take subsets of the list where all elements are the same length and make those into data frames
One way to “drill down” into lists is with purrr::pluck()
caffeine_detail |> pluck ("name" )
caffeine_detail |> pluck (1 )
caffeine_detail |> pluck ("experimentalProperties" , 1 , "property" )
We can instead use purrr::keep()
to supply a function to determine which elements to extract. For example, let’s take all the elements with length == 1 and turn those into a data frame.
caffeine_basics <- caffeine_detail |>
purrr:: keep (\(x) length (x) == 1 ) |>
as_tibble ()
This uses an anonymous function , \(x) length(x) == 1
, which is saying “for list element x
, is the length of x
1?”. keep()
iterates like a for loop where x
is representing caffeine_detail[[1]]
through caffeine_detail[[15]]
.
Now that we have a data frame, we can make a table with gt
that displays the SVG data as an image of the molecule!
library (gt)
gt (caffeine_basics) |>
fmt_markdown (columns = c (image, molecularFormula)) #molecular formula was also HTML
substance/pt/58082
58-08-2
Caffeine
InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3
InChIKey=RYYVLZVUVIJVGH-UHFFFAOYSA-N
O=C1C2=C(N(C)C(=O)N1C)N=CN2C
O=C1C2=C(N=CN2C)N(C(=O)N1C)C
194.19
TRUE
Nice! This isn’t a gt
workshop, but there is a lot you can do with gt
to make publication quality tables.
Iterating
The true power of accessing APIs programatically is that we can iterate! For example, we can take a list of chemicals, search for the registry number for all of them, get the results for all of them, and turn it into a single output table.
Many APIs have a rate limit that limits how quickly you can make requests (e.g. 10 per minute). Sometimes this rate limit is documented on the API website, sometimes you’ll just get errors if you make too many requests (surprise!). Slow down your requests with req_throttle()
.
chemical_names <- c ("caffeine" , "vanillin" , "bergamotene" ) #vanilla earl grey anyone?
All queries will start like this
req_search_base <-
request ("https://commonchemistry.cas.org/api" ) |>
req_url_path_append ("search" )
Then we want to generate queries for each of the chemicals by plugging the elements of chemical_names
into req_url_query(q = <chemical_name>)
. We can do that with map()
from the purr
package. The first argument is the thing to iterate over—chemical_names
in our case. The second argument is a function to perform. Here I’ve again used R’s “shortcut” for creating an anonymous function: \(arg_name) do_things_to(arg_name)
req_list <-
map (chemical_names, \(x) req_search_base |> req_url_query (q = x))
req_list
GET https://commonchemistry.cas.org/api/search?q=caffeine
GET https://commonchemistry.cas.org/api/search?q=vanillin
GET https://commonchemistry.cas.org/api/search?q=bergamotene
So now we have a list of three queries. We can use map()
again to perform all of them. This time, we don’t even need the anonymous function because req_perform()
just takes the request as it’s first unnamed argument.
resp_list <- map (req_list, req_perform)
resp_list
GET https://commonchemistry.cas.org/api/search?q=caffeine
Content-Type: application/json
Body: In memory (3893 bytes)
GET https://commonchemistry.cas.org/api/search?q=vanillin
Content-Type: application/json
Body: In memory (2682 bytes)
GET https://commonchemistry.cas.org/api/search?q=bergamotene
Content-Type: application/json
Body: In memory (2270 bytes)
And again, we can use map
to extract all the results
map (resp_list, resp_body_json)
[[1]]
[[1]]$count
[1] 1
[[1]]$results
[[1]]$results[[1]]
[[1]]$results[[1]]$rn
[1] "58-08-2"
[[1]]$results[[1]]$name
[1] "Caffeine"
[[1]]$results[[1]]$image
[1] "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-rendering=\"auto\" stroke-width=\"1\" stroke-opacity=\"1\" stroke-miterlimit=\"10\" stroke-linejoin=\"miter\" stroke-linecap=\"square\" stroke-dashoffset=\"0\" stroke-dasharray=\"none\" stroke=\"black\" shape-rendering=\"auto\" image-rendering=\"auto\" height=\"152.4\" font-weight=\"normal\" font-style=\"normal\" font-size=\"12\" font-family=\"'Dialog'\" fill-opacity=\"1\" fill=\"black\" color-rendering=\"auto\" color-interpolation=\"auto\" xmlns=\"http://www.w3.org/2000/svg\"><g><g stroke=\"white\" fill=\"white\"><rect y=\"0\" x=\"0\" width=\"6663\" stroke=\"none\" height=\"5080\"/></g><g transform=\"translate(32866,32758)\" text-rendering=\"geometricPrecision\" stroke-width=\"44\" stroke-linejoin=\"round\" stroke-linecap=\"round\"><line y2=\"-29668\" y1=\"-30807\" x2=\"-29642\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29810\" y1=\"-30665\" x2=\"-29813\" x1=\"-29813\" fill=\"none\"/><line y2=\"-29099\" y1=\"-29668\" x2=\"-30625\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29668\" y1=\"-29668\" x2=\"-29642\" x1=\"-27860\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-29642\" x1=\"-30437\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-28842\" x1=\"-29642\" fill=\"none\"/><line y2=\"-29562\" y1=\"-29099\" x2=\"-31425\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29888\" y1=\"-30807\" x2=\"-27672\" x1=\"-27672\" fill=\"none\"/><line y2=\"-31266\" y1=\"-30807\" x2=\"-30815\" x1=\"-31613\" fill=\"none\"/><line y2=\"-30807\" y1=\"-31266\" x2=\"-27672\" x1=\"-28467\" fill=\"none\"/><line y2=\"-30717\" y1=\"-31061\" x2=\"-27857\" x1=\"-28452\" fill=\"none\"/><line y2=\"-30807\" y1=\"-29888\" x2=\"-31613\" x1=\"-31613\" fill=\"none\"/><line y2=\"-29117\" y1=\"-29481\" x2=\"-27120\" x1=\"-27484\" fill=\"none\"/><line y2=\"-32202\" y1=\"-31595\" x2=\"-30625\" x1=\"-30625\" fill=\"none\"/><line y2=\"-29313\" y1=\"-29562\" x2=\"-32231\" x1=\"-31800\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30555\" x1=\"-30555\" fill=\"none\"/><line y2=\"-28273\" y1=\"-29099\" x2=\"-30697\" x1=\"-30697\" fill=\"none\"/><line y2=\"-31162\" y1=\"-30746\" x2=\"-32367\" x1=\"-31647\" fill=\"none\"/><line y2=\"-31283\" y1=\"-30867\" x2=\"-32297\" x1=\"-31578\" fill=\"none\"/><text y=\"-27874\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-28750\" xml:space=\"preserve\" x=\"-27083\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28660\" xml:space=\"preserve\" x=\"-26505\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-32242\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-32153\" xml:space=\"preserve\" x=\"-30200\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-28914\" xml:space=\"preserve\" x=\"-32762\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-28825\" xml:space=\"preserve\" x=\"-32185\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-31150\" xml:space=\"preserve\" x=\"-32673\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-27822\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-30778\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-31197\" xml:space=\"preserve\" x=\"-28808\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text><text y=\"-29489\" xml:space=\"preserve\" x=\"-31766\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">N</text></g></g></svg>"
[[2]]
[[2]]$count
[1] 1
[[2]]$results
[[2]]$results[[1]]
[[2]]$results[[1]]$rn
[1] "121-33-5"
[[2]]$results[[1]]$name
[1] "Vanillin"
[[2]]$results[[1]]$image
[1] "<svg width=\"186.99\" viewBox=\"0 0 6233 2898\" text-rendering=\"auto\" stroke-width=\"1\" stroke-opacity=\"1\" stroke-miterlimit=\"10\" stroke-linejoin=\"miter\" stroke-linecap=\"square\" stroke-dashoffset=\"0\" stroke-dasharray=\"none\" stroke=\"black\" shape-rendering=\"auto\" image-rendering=\"auto\" height=\"86.94\" font-weight=\"normal\" font-style=\"normal\" font-size=\"12\" font-family=\"'Dialog'\" fill-opacity=\"1\" fill=\"black\" color-rendering=\"auto\" color-interpolation=\"auto\" xmlns=\"http://www.w3.org/2000/svg\"><g><g stroke=\"white\" fill=\"white\"><rect y=\"0\" x=\"0\" width=\"6233\" stroke=\"none\" height=\"2898\"/></g><g transform=\"translate(32866,32758)\" text-rendering=\"geometricPrecision\" stroke-width=\"44\" stroke-linejoin=\"round\" stroke-linecap=\"round\"><line y2=\"-32465\" y1=\"-31896\" x2=\"-29720\" x1=\"-30706\" fill=\"none\"/><line y2=\"-32245\" y1=\"-31821\" x2=\"-29758\" x1=\"-30498\" fill=\"none\"/><line y2=\"-31896\" y1=\"-30758\" x2=\"-30706\" x1=\"-30706\" fill=\"none\"/><line y2=\"-31896\" y1=\"-32465\" x2=\"-28735\" x1=\"-29720\" fill=\"none\"/><line y2=\"-30758\" y1=\"-30191\" x2=\"-30706\" x1=\"-29720\" fill=\"none\"/><line y2=\"-30836\" y1=\"-30408\" x2=\"-30498\" x1=\"-29758\" fill=\"none\"/><line y2=\"-30758\" y1=\"-31896\" x2=\"-28735\" x1=\"-28735\" fill=\"none\"/><line y2=\"-30902\" y1=\"-31754\" x2=\"-28906\" x1=\"-28906\" fill=\"none\"/><line y2=\"-30191\" y1=\"-30758\" x2=\"-29720\" x1=\"-28735\" fill=\"none\"/><line y2=\"-32422\" y1=\"-31896\" x2=\"-27828\" x1=\"-28735\" fill=\"none\"/><line y2=\"-32006\" y1=\"-32422\" x2=\"-27109\" x1=\"-27828\" fill=\"none\"/><line y2=\"-31936\" y1=\"-32248\" x2=\"-27268\" x1=\"-27808\" fill=\"none\"/><line y2=\"-32312\" y1=\"-31896\" x2=\"-31425\" x1=\"-30706\" fill=\"none\"/><line y2=\"-32063\" y1=\"-32312\" x2=\"-32231\" x1=\"-31800\" fill=\"none\"/><line y2=\"-30454\" y1=\"-30758\" x2=\"-31234\" x1=\"-30706\" fill=\"none\"/><text y=\"-32242\" xml:space=\"preserve\" x=\"-31766\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text><text y=\"-31665\" xml:space=\"preserve\" x=\"-32762\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">CH</text><text y=\"-31575\" xml:space=\"preserve\" x=\"-32185\" stroke=\"none\" font-size=\"313.3333\" font-family=\"sans-serif\">3</text><text y=\"-30056\" xml:space=\"preserve\" x=\"-31766\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">OH</text><text y=\"-31717\" xml:space=\"preserve\" x=\"-27074\" stroke=\"none\" font-size=\"433.3333\" font-family=\"sans-serif\">O</text></g></g></svg>"
[[3]]
[[3]]$count
[1] 1
[[3]]$results
[[3]]$results[[1]]
[[3]]$results[[1]]$rn
[1] "6895-56-3"
[[3]]$results[[1]]$name
[1] "Bergamotene"
[[3]]$results[[1]]$image
[1] "<svg width=\"230.52\" viewBox=\"0 0 7684 2975\" text-rendering=\"auto\" stroke-width=\"1\" stroke-opacity=\"1\" stroke-miterlimit=\"10\" stroke-linejoin=\"miter\" stroke-linecap=\"square\" stroke-dashoffset=\"0\" stroke-dasharray=\"none\" stroke=\"black\" shape-rendering=\"auto\" image-rendering=\"auto\" height=\"89.25\" font-weight=\"normal\" font-style=\"normal\" font-size=\"12\" font-family=\"'Dialog'\" fill-opacity=\"1\" fill=\"black\" color-rendering=\"auto\" color-interpolation=\"auto\" xmlns=\"http://www.w3.org/2000/svg\"><g><g stroke=\"white\" fill=\"white\"><rect y=\"0\" x=\"0\" width=\"7684\" stroke=\"none\" height=\"2975\"/></g><g transform=\"translate(32866,32866)\" text-rendering=\"geometricPrecision\" stroke-width=\"44\" stroke-linejoin=\"round\" stroke-linecap=\"round\"><line y2=\"-32289\" y1=\"-31714\" x2=\"-27245\" x1=\"-28227\" fill=\"none\"/><line y2=\"-31142\" y1=\"-32289\" x2=\"-27245\" x1=\"-27245\" fill=\"none\"/><line y2=\"-31714\" y1=\"-32289\" x2=\"-26262\" x1=\"-27245\" fill=\"none\"/><line y2=\"-31714\" y1=\"-30570\" x2=\"-28227\" x1=\"-28227\" fill=\"none\"/><line y2=\"-30570\" y1=\"-31142\" x2=\"-28227\" x1=\"-27245\" fill=\"none\"/><line y2=\"-30570\" y1=\"-31714\" x2=\"-26262\" x1=\"-26262\" fill=\"none\"/><line y2=\"-30570\" y1=\"-29995\" x2=\"-28227\" x1=\"-27245\" fill=\"none\"/><line y2=\"-29995\" y1=\"-30570\" x2=\"-27245\" x1=\"-26262\" fill=\"none\"/><line y2=\"-32240\" y1=\"-31714\" x2=\"-29134\" x1=\"-28227\" fill=\"none\"/><line y2=\"-31714\" y1=\"-32240\" x2=\"-30041\" x1=\"-29134\" fill=\"none\"/><line y2=\"-32240\" y1=\"-31714\" x2=\"-30948\" x1=\"-30041\" fill=\"none\"/><line y2=\"-31714\" y1=\"-32240\" x2=\"-31855\" x1=\"-30948\" fill=\"none\"/><line y2=\"-31659\" y1=\"-32052\" x2=\"-31673\" x1=\"-30992\" fill=\"none\"/><line y2=\"-32762\" y1=\"-31714\" x2=\"-28227\" x1=\"-28227\" fill=\"none\"/><line y2=\"-32240\" y1=\"-31714\" x2=\"-25355\" x1=\"-26262\" fill=\"none\"/><line y2=\"-32118\" y1=\"-31595\" x2=\"-25286\" x1=\"-26193\" fill=\"none\"/><line y2=\"-32240\" y1=\"-31714\" x2=\"-32762\" x1=\"-31855\" fill=\"none\"/><line y2=\"-30668\" y1=\"-31714\" x2=\"-31855\" x1=\"-31855\" fill=\"none\"/></g></g></svg>"
Now here it’s gotten a little complicated because we now have a list of lists. So instead of just map()
ing resp_body_json()
, let’s do some data extraction inside of map()
. It’s usually easiest to figure out how to do this by first testing it out on just a single element.
resp_list[[1 ]] |>
resp_body_json () |>
pluck ("results" , 1 ) |> #equivalent to x[[results]][[1]]
as_tibble ()
# A tibble: 1 × 3
rn name image
<chr> <chr> <chr>
1 58-08-2 Caffeine "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-render…
Then we can put this code inside an anonymous function \(x)
and replace resp_list[[1]]
with x
df_list <- map (resp_list, \(x) {
x |>
resp_body_json () |>
pluck ("results" , 1 ) |> #equivalent to x[[results]][[1]]
as_tibble ()
})
Now we have a list of tibbles that we can combine with list_rbind()
search_results <- list_rbind (df_list)
search_results
# A tibble: 3 × 3
rn name image
<chr> <chr> <chr>
1 58-08-2 Caffeine "<svg width=\"199.89\" viewBox=\"0 0 6663 5080\" text-r…
2 121-33-5 Vanillin "<svg width=\"186.99\" viewBox=\"0 0 6233 2898\" text-r…
3 6895-56-3 Bergamotene "<svg width=\"230.52\" viewBox=\"0 0 7684 2975\" text-r…
We can do the cool gt
thing with multiple rows now
gt (search_results) |>
fmt_markdown (columns = image)
58-08-2
Caffeine
121-33-5
Vanillin
6895-56-3
Bergamotene
Try it on your own!
As an exercise, use the three CAS registry numbers in search_results
to search the detail/
endpoint and make a table of chemical representations (InChI, InChIKey, SMILES, and canonical SMILES) for each molecule.