4 Seagrass Density to DWC eMoF format

Author

ZL Monteith, Hakai Institute

Published

July 21, 2025

4.1 Hakai Seagrass

4.1.1 Setup

This section clears the workspace, checks the working directory, and installs packages (if required) and loads packages, and loads necessary datasets

library("knitr")
# Knitr global chunk options
opts_chunk$set(message = FALSE,
               warning = FALSE,
               error   = FALSE)

4.1.1.1 Load Data

First load the seagrass density survey data, set variable classes, and have a quick look

# Load density data
seagrassDensity <- 
  read.csv(seagrassDensity_csv,
           colClass = "character") %>%
  mutate(date             = ymd(date),
         depth            = as.numeric(depth),
         transect_dist    = factor(transect_dist),
         collected_start  = ymd_hms(collected_start),
         collected_end    = ymd_hms(collected_end),
         density          = as.numeric(density),
         density_msq      = as.numeric(density_msq),
         canopy_height_cm = as.numeric(canopy_height_cm),
         flowering_shoots = as.numeric(flowering_shoots)) %T>%
  glimpse()

Rows: 3,031
Columns: 22
$ X                <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "1…
$ organization     <chr> "HAKAI", "HAKAI", "HAKAI", "HAKAI", "HAKAI", "HAKAI",…
$ work_area        <chr> "CALVERT", "CALVERT", "CALVERT", "CALVERT", "CALVERT"…
$ project          <chr> "MARINEGEO", "MARINEGEO", "MARINEGEO", "MARINEGEO", "…
$ survey           <chr> "PRUTH_BAY", "PRUTH_BAY", "PRUTH_BAY", "PRUTH_BAY", "…
$ site_id          <chr> "PRUTH_BAY_INTERIOR4", "PRUTH_BAY_INTERIOR4", "PRUTH_…
$ date             <date> 2016-05-13, 2016-05-13, 2016-05-13, 2016-05-13, 2016…
$ sampling_bout    <chr> "4", "4", "4", "4", "4", "4", "4", "6", "6", "6", "6"…
$ dive_supervisor  <chr> "Zach", "Zach", "Zach", "Zach", "Zach", "Zach", "Zach…
$ collector        <chr> "Derek", "Derek", "Derek", "Derek", "Derek", "Derek",…
$ hakai_id         <chr> "2016-05-13_PRUTH_BAY_INTERIOR4_0", "2016-05-13_PRUTH…
$ sample_type      <chr> "seagrass_density", "seagrass_density", "seagrass_den…
$ depth            <dbl> 6.0, 6.0, 6.0, 6.0, 5.0, 6.0, 6.0, 9.1, 9.0, 8.9, 9.0…
$ transect_dist    <fct> 0, 5, 10, 15, 20, 25, 30, 10, 15, 20, 25, 30, 0, 5, 1…
$ collected_start  <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ collected_end    <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ density          <dbl> 13, 10, 18, 22, 16, 31, 9, 5, 6, 6, 6, 3, 13, 30, 23,…
$ density_msq      <dbl> 208, 160, 288, 352, 256, 496, 144, 80, 96, 96, 96, 48…
$ canopy_height_cm <dbl> 60, 63, 80, 54, 55, 50, 63, 85, 80, 90, 95, 75, 60, 6…
$ flowering_shoots <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, NA, NA, NA…
$ comments         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ quality_log      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…

Next, load the habitat survey data, and same as above, set variable classes as necessary, and have a quick look.

# load habitat data, set variable classes, have a quick look
seagrassHabitat <-
  read.csv(seagrassHabitat_csv,
           colClasses = "character") %>%  
  mutate(date            = ymd(date),
         depth           = as.numeric(depth),
         hakai_id        = str_pad(hakai_id, 5, pad = "0"),
         transect_dist   = factor(transect_dist),
         collected_start = ymd_hms(collected_start),
         collected_end   = ymd_hms(collected_end)) %T>%
  glimpse()

Rows: 2,052
Columns: 28
$ X                <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "1…
$ organization     <chr> "HAKAI", "HAKAI", "HAKAI", "HAKAI", "HAKAI", "HAKAI",…
$ work_area        <chr> "CALVERT", "CALVERT", "CALVERT", "CALVERT", "CALVERT"…
$ project          <chr> "MARINEGEO", "MARINEGEO", "MARINEGEO", "MARINEGEO", "…
$ survey           <chr> "CHOKED_PASS", "CHOKED_PASS", "CHOKED_PASS", "CHOKED_…
$ site_id          <chr> "CHOKED_PASS_INTERIOR6", "CHOKED_PASS_INTERIOR6", "CH…
$ date             <date> 2017-11-22, 2017-11-22, 2017-11-22, 2017-11-22, 2017…
$ sampling_bout    <chr> "6", "6", "6", "6", "6", "6", "1", "1", "1", "1", "1"…
$ dive_supervisor  <chr> "gillian", "gillian", "gillian", "gillian", "gillian"…
$ collector        <chr> "zach", "zach", "zach", "zach", "zach", "zach", "kyle…
$ hakai_id         <chr> "10883", "2017-11-22_CHOKED_PASS_INTERIOR6_5 - 10", "…
$ sample_type      <chr> "seagrass_habitat", "seagrass_habitat", "seagrass_hab…
$ depth            <dbl> 9.2, 9.4, 9.3, 9.0, 9.2, 9.2, 3.4, 3.4, 3.4, 3.4, 3.4…
$ transect_dist    <fct> 0 - 5, 10-May, 15-Oct, 15 - 20, 20 - 25, 25 - 30, 0 -…
$ collected_start  <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ collected_end    <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ bag_uid          <chr> "10883", NA, NA, "11094", NA, "11182", "7119", NA, "7…
$ bag_number       <chr> "3557", NA, NA, "3520", NA, "903", "800", NA, "318", …
$ density_range    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ substrate        <chr> "sand,shell hash", "sand,shell hash", "sand,shell has…
$ patchiness       <chr> "< 1", "< 1", "02-Jan", "< 1", "< 1", "< 1", "< 1", "…
$ adj_habitat_1    <chr> "seagrass", "seagrass", "seagrass", "seagrass", "seag…
$ adj_habitat_2    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ sample_collected <chr> "TRUE", "FALSE", "FALSE", "TRUE", "FALSE", "TRUE", "T…
$ vegetation_1     <chr> NA, NA, NA, NA, NA, NA, "des", NA, "des", NA, NA, NA,…
$ vegetation_2     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ comments         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ quality_log      <chr> "1: Flowering shoots 0 for entire transects", NA, NA,…

Finally, load coordinate data for surveys, and subset necessary variables

coordinates <- 
  read.csv(coordinate_csv,
           colClass = c("Point.Name" = "character")) %>%
  select(Point.Name, Decimal.Lat, Decimal.Long) %T>%
  glimpse()

Rows: 70
Columns: 3
$ Point.Name   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Decimal.Lat  <dbl> 52.06200, 52.05200, 51.92270, 51.92500, 51.80900, 51.8090…
$ Decimal.Long <dbl> -128.4120, -128.4030, -128.4648, -128.4540, -128.2360, -1…

4.1.1.2 Merge Datasets

Now all the datasets have been loaded, and briefly formatted, we’ll join together the habitat and density surveys, and the coordinates for these.

The seagrass density surveys collect data at discrete points (ie. 5 metres) along the transects, while the habitat surveys collect data over sections (ie. 0 - 5 metres) along the transects. In order to fit these two surveys together, we’ll narrow the habitat surveys from a range to a point so the locations will match. Based on how the habitat data is collected, the point the habitat survey is applied to will be the distance at the end of the swath (ie. 10-15m will become 15m). To account for no preceeding distance, the 0m distance will use the 0-5m section of the survey.

First, well make the necessary transformations to the habitat dataset.

# Reformat seagrassHabitat to merge with seagrassDensity
## replicate 0 - 5m transect dist to match with 0m in density survey;
## rest of habitat bins can map one to one with density (ie. 5 - 10m -> 10m)
seagrass0tmp <- 
  seagrassHabitat %>%
  filter(transect_dist %in% c("0 - 5", "0 - 2.5")) %>%
  mutate(transect_dist = factor(0))

## collapse various levels to match with seagrassDensity transect_dist
seagrassHabitat$transect_dist <- 
  fct_collapse(seagrassHabitat$transect_dist,
               "5" = c("0 - 5", "2.5 - 7.5"),
               "10" = c("5 - 10", "7.5 - 12.5"),
               "15" = c("10 - 15", "12.5 - 17.5"),
               "20" = c("15 - 20", "17.5 - 22.5"),
               "25" = c("20 - 25", "22.5 - 27.5"),
               "30" = c("25 - 30", "27.5 - 30"))

## merge seagrass0tmp into seagrassHabitat to account for 0m samples,
## set class for date, datetime variables
seagrassHabitatFull <- 
  rbind(seagrass0tmp, seagrassHabitat) %>%
  filter(transect_dist != "0 - 2.5")  %>% # already captured in seagrass0tmp 
  droplevels(.)  # remove now unused factor levels

With the distances of habitat and density surveys now corresponding, we can now merge these two datasets plus there coordinates together, combine redundant fields, and remove unnecessary fields.

# Merge seagrassHabitatFull with seagrassDensity, then coordinates
seagrass <- 
  full_join(seagrassHabitatFull, seagrassDensity, 
            by = c("organization",
                   "work_area",
                   "project",
                   "survey",
                   "site_id", 
                   "date",
                   "transect_dist")) %>%
  # merge hakai_id.x and hakai_id.y into single variable field;
  # use combination of date, site_id, transect_dist, and field uid (hakai_id 
  # when present)
  mutate(field_uid = ifelse(sample_collected == TRUE, hakai_id.x, "NA"),
         hakai_id = paste(date, "HAKAI:CALVERT", site_id, transect_dist, sep = ":"),
         # below, aggregate metadata that didn't merge naturally (ie. due to minor 
         # differences in watch time or depth gauges)
         dive_supervisor = dive_supervisor.x,
         collected_start = ymd_hms(ifelse(is.na(collected_start.x),
                                          collected_start.y, 
                                          collected_start.x)),
         collected_end   = ymd_hms(ifelse(is.na(collected_start.x),
                                          collected_start.y,
                                          collected_start.x)),
         depth_m         = ifelse(is.na(depth.x), depth.y, depth.x),
         sampling_bout   = sampling_bout.x) %>%
  left_join(., coordinates,  # add coordinates
            by = c("site_id" = "Point.Name")) %>%
  select( - c(X.x, X.y, hakai_id.x, hakai_id.y,  # remove unnecessary variables
              dive_supervisor.x, dive_supervisor.y,
              collected_start.x, collected_start.y,
              collected_end.x, collected_end.y,
              depth.x, depth.y,
              sampling_bout.x, sampling_bout.y)) %>%
  mutate(density_msq = as.character(density_msq),
         canopy_height_cm = as.character(canopy_height_cm),
         flowering_shoots = as.character(flowering_shoots),
         depth_m = as.character(depth_m)) %T>%
  glimpse()

Rows: 3,743
Columns: 38
$ organization     <chr> "HAKAI", "HAKAI", "HAKAI", "HAKAI", "HAKAI", "HAKAI",…
$ work_area        <chr> "CALVERT", "CALVERT", "CALVERT", "CALVERT", "CALVERT"…
$ project          <chr> "MARINEGEO", "MARINEGEO", "MARINEGEO", "MARINEGEO", "…
$ survey           <chr> "CHOKED_PASS", "CHOKED_PASS", "CHOKED_PASS", "PRUTH_B…
$ site_id          <chr> "CHOKED_PASS_INTERIOR6", "CHOKED_PASS_EDGE1", "CHOKED…
$ date             <date> 2017-11-22, 2017-05-19, 2017-05-19, 2017-07-03, 2017…
$ collector.x      <chr> "zach", "kyle", NA, "tanya", "zach", "zach", "zach", …
$ sample_type.x    <chr> "seagrass_habitat", "seagrass_habitat", "seagrass_hab…
$ transect_dist    <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ bag_uid          <chr> "10883", "7119", "7031", "2352", "10255", "10023", "1…
$ bag_number       <chr> "3557", "800", "301", "324", "3506", "3555", "3534", …
$ density_range    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ substrate        <chr> "sand,shell hash", "sand,shell hash", "sand,shell has…
$ patchiness       <chr> "< 1", "< 1", "< 1", "< 1", "< 1", "05-Apr", "04-Mar"…
$ adj_habitat_1    <chr> "seagrass", "sand", "standing kelp", "seagrass", "sea…
$ adj_habitat_2    <chr> NA, NA, NA, NA, NA, NA, "standing kelp", NA, NA, NA, …
$ sample_collected <chr> "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE…
$ vegetation_1     <chr> NA, "des", "des", "zm", "des", NA, NA, NA, NA, NA, NA…
$ vegetation_2     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ comments.x       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ quality_log.x    <chr> "1: Flowering shoots 0 for entire transects", NA, NA,…
$ collector.y      <chr> "derek", "ondine", "ondine", "derek", "derek", "derek…
$ sample_type.y    <chr> "seagrass_density", "seagrass_density", "seagrass_den…
$ density          <dbl> 4, 10, 6, 13, 6, 1, 2, 6, 21, 3, 7, 4, 3, 14, 17, 11,…
$ density_msq      <chr> "64", "160", "96", "208", "96", "16", "32", "96", "33…
$ canopy_height_cm <chr> "80", "80", "110", "60", "125", "100", "100", "125", …
$ flowering_shoots <chr> "0", NA, NA, NA, NA, NA, NA, "0", NA, NA, NA, "0", NA…
$ comments.y       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ quality_log.y    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ field_uid        <chr> "10883", "07119", "07031", "02352", "10255", "10023",…
$ hakai_id         <chr> "2017-11-22:HAKAI:CALVERT:CHOKED_PASS_INTERIOR6:0", "…
$ dive_supervisor  <chr> "gillian", "gillian,gillian.sadlierbrown", "gillian,g…
$ collected_start  <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ collected_end    <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ depth_m          <chr> "9.2", "3.4", "4.8", "2.4", "5.3", "5.6", "4.4", "2.5…
$ sampling_bout    <chr> "6", "1", "3", "5", "5", "3", "5", "2", "1", "2", "6"…
$ Decimal.Lat      <dbl> 51.67482, 51.67882, 51.67493, 51.64532, 51.67349, 51.…
$ Decimal.Long     <dbl> -128.1195, -128.1148, -128.1237, -128.1193, -128.1180…

4.1.2 Convert Data to Darwin Core - Extended Measurement or Fact format

The Darwin Core ExtendedMeasurementOrFact (eMoF) extension bases records around a core event (rather than occurrence as in standard Darwin Core), allowing for additional measurement variables to be associated with occurrence data.

4.1.2.1 Add Event ID and Occurrence ID variables to dataset

As this dataset will be annually updated, rather than using natural keys (ie. using package::uuid to autogenerate) for event and occurence IDs, here we will use surrogate keys made up of a concatenation of date survey, transect location, observation distance, and sample ID (for occurrenceID, when a sample is present).

# create and populate eventID variable
## currently only event is used, but additional surveys and abiotic data
## are associated with parent events that may be included at a later date
seagrass$eventID <- seagrass$hakai_id

# create and populate occurrenceID; combine eventID with transect_dist 
# and field_uid
## in the event of <NA> field_uid, no sample was collected, but
## measurements and occurrence are still taken; no further subsamples
## are associated with <NA> field_uids
seagrass$occurrenceID <- 
  with(seagrass, 
       paste(eventID, transect_dist, field_uid, sep = ":"))

4.1.2.2 Create Event, Occurrence, and eMoF tables

Now that we’ve created eventIDs and occurrenceIDs to connect all the variables together, we can begin to create the Event, Occurrence, and extended Measurement or Fact table necessary for DarwinCore compliant datasets

4.1.2.2.1 Event Table

# subset seagrass to create event table
seagrassEvent <-
  seagrass %>%
  distinct %>%  # some duplicates in data stemming from database conflicts
  select(date,
         Decimal.Lat, Decimal.Long, transect_dist,
         depth_m, eventID) %>%
  rename(eventDate                     = date,
         decimalLatitude               = Decimal.Lat,
         decimalLongitude              = Decimal.Long,
         coordinateUncertaintyInMeters = transect_dist,
         minimumDepthInMeters          = depth_m,
         maximumDepthInMeters          = depth_m) %>%
  mutate(geodeticDatum  = "WGS84",
         samplingEffort = "30 metre transect") %T>% glimpse

Rows: 3,659
Columns: 8
$ eventDate                     <date> 2017-11-22, 2017-05-19, 2017-05-19, 201…
$ decimalLatitude               <dbl> 51.67482, 51.67882, 51.67493, 51.64532, …
$ decimalLongitude              <dbl> -128.1195, -128.1148, -128.1237, -128.11…
$ coordinateUncertaintyInMeters <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ maximumDepthInMeters          <chr> "9.2", "3.4", "4.8", "2.4", "5.3", "5.6"…
$ eventID                       <chr> "2017-11-22:HAKAI:CALVERT:CHOKED_PASS_IN…
$ geodeticDatum                 <chr> "WGS84", "WGS84", "WGS84", "WGS84", "WGS…
$ samplingEffort                <chr> "30 metre transect", "30 metre transect"…

# save event table to csv
write.csv(seagrassEvent, seagrassEvent_csv)

4.1.2.2.2 Occurrence Table

# subset seagrass to create occurrence table
seagrassOccurrence <-
  seagrass %>%
  distinct %>%  # some duplicates in data stemming from database conflicts
  select(eventID, occurrenceID) %>%
  mutate(basisOfRecord = "HumanObservation",
         scientificName   = "Zostera subg. Zostera marina",
         occurrenceStatus = "present")

# Taxonomic name matching
# in addition to the above metadata, DarwinCore format requires further
# taxonomic data that can be acquired through the WoRMS register.
## Load taxonomic info, downloaded via WoRMS tool
# zmWorms <- 
#   read.delim("raw_data/zmworms_matched.txt",
#              header = TRUE,
#              nrows  = 1)

zmWorms <- wm_record(id = 145795)

# join WoRMS name with seagrassOccurrence create above
seagrassOccurrence <- 
  full_join(seagrassOccurrence, zmWorms, 
            by = c("scientificName" = "scientificname")) %>%
  select(eventID, occurrenceID, basisOfRecord, scientificName, occurrenceStatus, AphiaID,
         url, authority, status, unacceptreason, taxonRankID, rank,
         valid_AphiaID, valid_name, valid_authority, parentNameUsageID,
         kingdom, phylum, class, order, family, genus, citation, lsid,
         isMarine, match_type, modified) %T>%
  glimpse

Rows: 3,660
Columns: 27
$ eventID           <chr> "2017-11-22:HAKAI:CALVERT:CHOKED_PASS_INTERIOR6:0", …
$ occurrenceID      <chr> "2017-11-22:HAKAI:CALVERT:CHOKED_PASS_INTERIOR6:0:0:…
$ basisOfRecord     <chr> "HumanObservation", "HumanObservation", "HumanObserv…
$ scientificName    <chr> "Zostera subg. Zostera marina", "Zostera subg. Zoste…
$ occurrenceStatus  <chr> "present", "present", "present", "present", "present…
$ AphiaID           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ url               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ authority         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ status            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ unacceptreason    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ taxonRankID       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ rank              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ valid_AphiaID     <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ valid_name        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ valid_authority   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ parentNameUsageID <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ kingdom           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ phylum            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ class             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ order             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ family            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ genus             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ citation          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ lsid              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ isMarine          <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ match_type        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ modified          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …

# save occurrence table to csv
write.csv(seagrassOccurrence, seagrassOccurrence_csv)

4.1.2.2.3 Extended MeasurementOrFact table

seagrassMof <-
  seagrass %>%
  # select variables for eMoF table
  select(date,
         eventID, survey, site_id, transect_dist,
         substrate, patchiness, adj_habitat_1, adj_habitat_2,
         vegetation_1, vegetation_2,
         density_msq, canopy_height_cm, flowering_shoots) %>%
  # split substrate into two variables (currently holds two substrate type in same variable)
  separate(substrate, sep = ",", into = c("substrate_1", "substrate_2")) %>%
  # change variables names to match NERC database (or to be more descriptive where none exist)
  rename(measurementDeterminedDate   = date,
         SubstrateTypeA              = substrate_1,
         SubstrateTypeB              = substrate_2,
         BarePatchLengthWithinSeagrass = patchiness,
         PrimaryAdjacentHabitat      = adj_habitat_1,
         SecondaryAdjacentHabitat    = adj_habitat_2,
         PrimaryAlgaeSp              = vegetation_1,
         SecondaryAlgaeSp            = vegetation_2,
         BedAbund                    = density_msq,
         CanopyHeight                = canopy_height_cm,
         FloweringBedAbund           = flowering_shoots) %>%  
  # reformat variables into DwC MeasurementOrFact format
  # (single values variable, with measurement type, unit, etc. variables)
  pivot_longer( - c(measurementDeterminedDate, eventID, survey, site_id, transect_dist),
                names_to = "measurementType",
                values_to = "measurementValue",
                values_ptypes = list(measurementValue = "character")) %>% 
  # use measurement type to fill in remainder of variables relating to 
  # NERC vocabulary and metadata fields
  mutate(
    measurementTypeID = case_when(
      measurementType == "BedAbund" ~ "http://vocab.nerc.ac.uk/collection/P01/current/SDBIOL02/",
      measurementType == "CanopyHeight" ~ "http://vocab.nerc.ac.uk/collection/P01/current/OBSMAXLX/",
      # measurementType == "BarePatchWithinSeagrass" ~ "",
      measurementType == "FloweringBedAbund" ~ "http://vocab.nerc.ac.uk/collection/P01/current/SDBIOL02/"),
    measurementUnit = case_when(
      measurementType == "BedAbund" ~ "Number per square metre",
      measurementType == "CanopyHeight" ~ "Centimetres",
      measurementType == "BarePatchhLengthWithinSeagrass" ~ "Metres",
      measurementType == "FloweringBedAbund" ~ "Number per square metre"),
    measurementUnitID = case_when(
      measurementType == "BedAbund" ~ "http://vocab.nerc.ac.uk/collection/P06/current/UPMS/",
      measurementType == "CanopyHeight" ~ "http://vocab.nerc.ac.uk/collection/P06/current/ULCM/",
      measurementType == "BarePatchhLengthWithinSeagrass" ~ "http://vocab.nerc.ac.uk/collection/P06/current/ULAA/2/",
      measurementType == "FloweringBedAbund" ~ "http://vocab.nerc.ac.uk/collection/P06/current/UPMS/"),
    measurementAccuracy = case_when(
      measurementType == "CanopyHeight" ~ 5),
    measurementMethod = case_when(
      measurementType == "BedAbund" ~ "25cmx25cm quadrat count",
      measurementType == "CanopyHeight" ~ "in situ with ruler",
      measurementType == "BarePatchhLengthWithinSeagrass" ~ "estimated along transect line",
      measurementType == "FloweringBedAbund" ~ "25cmx25cm quadrat count")) %>%
  select(eventID, measurementDeterminedDate, measurementType, measurementValue,
         measurementTypeID, measurementUnit, measurementUnitID, measurementAccuracy,
         measurementMethod) %T>%
#  select(!c(survey, site_id, transect_dist)) %T>%
  glimpse()

Rows: 37,430
Columns: 9
$ eventID                   <chr> "2017-11-22:HAKAI:CALVERT:CHOKED_PASS_INTERI…
$ measurementDeterminedDate <date> 2017-11-22, 2017-11-22, 2017-11-22, 2017-11…
$ measurementType           <chr> "SubstrateTypeA", "SubstrateTypeB", "BarePat…
$ measurementValue          <chr> "sand", "shell hash", "< 1", "seagrass", NA,…
$ measurementTypeID         <chr> NA, NA, NA, NA, NA, NA, NA, "http://vocab.ne…
$ measurementUnit           <chr> NA, NA, NA, NA, NA, NA, NA, "Number per squa…
$ measurementUnitID         <chr> NA, NA, NA, NA, NA, NA, NA, "http://vocab.ne…
$ measurementAccuracy       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 5, NA, NA, N…
$ measurementMethod         <chr> NA, NA, NA, NA, NA, NA, NA, "25cmx25cm quadr…

# save eMoF table to csv
write.csv(seagrassMof, seagrassMof_csv)

4.1.3 Session Info

Print session information below in case necessary for future reference

# Print Session Info for future reference
sessionInfo()

R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.7.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: UTC
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] worrms_0.4.3    magrittr_2.0.3  lubridate_1.9.4 forcats_1.0.0  
 [5] stringr_1.5.1   dplyr_1.1.4     purrr_1.1.0     readr_2.1.5    
 [9] tidyr_1.3.1     tibble_3.3.0    ggplot2_3.5.2   tidyverse_2.0.0
[13] here_1.0.1      knitr_1.50     

loaded via a namespace (and not attached):
 [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.4.1     Rcpp_1.1.0        
 [5] tidyselect_1.2.1   urltools_1.7.3.1   scales_1.4.0       triebeard_0.4.1   
 [9] yaml_2.3.10        fastmap_1.2.0      R6_2.6.1           generics_0.1.4    
[13] curl_6.4.0         htmlwidgets_1.6.4  crul_1.5.0         rprojroot_2.1.0   
[17] pillar_1.11.0      RColorBrewer_1.1-3 tzdb_0.5.0         rlang_1.1.6       
[21] httpcode_0.3.0     stringi_1.8.7      xfun_0.52          timechange_0.3.0  
[25] cli_3.6.5          withr_3.0.2        digest_0.6.37      grid_4.4.1        
[29] hms_1.1.3          lifecycle_1.0.4    vctrs_0.6.5        evaluate_1.0.4    
[33] glue_1.8.0         farver_2.1.2       rmarkdown_2.29     tools_4.4.1       
[37] pkgconfig_2.0.3    htmltools_0.5.8.1