####----
# Program Name : adsl
# Protocol Name:
# Author :
# Creation Date: `<DDMMMYYYY>` by <initials>
# Purpose : Create ADSL dataset from SDTM datasets
# Modification : <DDMMMYYYY> by <initials>: <summary of changes>
####----
library(tidyverse)
library(lubridate)
library(xportr)
library(haven)
library(stringr)
##----------------------------------------------------------------------
## 0. Helper functions
##----------------------------------------------------------------------
# Function to group together missing strings and NAs
str_missing <- function(x) {
is.na(x) | str_trim(as.character(x)) == ""
}
# Function to check dataframe metadata
contents <- function(dat) {
row_contents <- function(nm, dat) {
var <- dat[[nm]]
data.frame(
Variable = nm,
Class = class(var)[1],
Label = ifelse(is.null(attr(var, "label")), "", attr(var, "label")),
Format = ifelse(is.null(attr(var, "SASformat")), "", attr(var, "SASformat")),
stringsAsFactors = FALSE
)
}
purrr::map_dfr(names(dat), row_contents, dat = dat)
}
##----------------------------------------------------------------------
## 1. Load SDTM from PHUSE Test Data Factory
##----------------------------------------------------------------------
sdtm <- "D:/R2SAS/r4sas/data/sdtm"
dm <- read_sas(paste0(sdtm, "/dm.sas7bdat"))
ex <- read_sas(paste0(sdtm, "/ex.sas7bdat"))
vs <- read_sas(paste0(sdtm, "/vs.sas7bdat"))
ds <- read_sas(paste0(sdtm, "/ds.sas7bdat"))
##----------------------------------------------------------------------
## 2. Derive DM-based variables (demo)
##----------------------------------------------------------------------
demo <- dm |>
# Keep only necessary columns
select(
STUDYID, USUBJID, SUBJID, SITEID,
AGE, AGEU, SEX, RACE, ETHNIC,
DTHDTC, ARM, ACTARM
) |>
mutate(
# Age groups
AGEGR1 = case_when(
AGE < 65 ~ "<65",
AGE >= 65 & AGE <= 70 ~ "65-70",
AGE > 70 ~ ">70",
TRUE ~ NA_character_
),
AGEGR1N = case_when(
AGEGR1 == "<65" ~ 1,
AGEGR1 == "65-70" ~ 2,
AGEGR1 == ">70" ~ 3,
TRUE ~ NA_real_
),
# Race numeric
RACEN = case_when(
RACE == "WHITE" ~ 1,
RACE == "BLACK OR AFRICAN AMERICAN" ~ 2,
RACE == "ASIAN" ~ 3,
RACE == "AMERICAN INDIAN OR ALASKA NATIVE" ~ 4,
RACE == "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER" ~ 5,
RACE == "UNKNOWN" ~ 6,
TRUE ~ NA_real_
),
# Planned treatment
TRT01P = ARM,
TRT01PN = case_when(
TRT01P == "Placebo" ~ 0,
TRT01P == "Xanomeline Low Dose" ~ 6,
TRT01P == "Xanomeline High Dose" ~ 9,
TRUE ~ NA_real_
),
# Actual treatment
TRT01A = ACTARM,
TRT01AN = case_when(
TRT01A == "Placebo" ~ 0,
TRT01A == "Xanomeline Low Dose" ~ 6,
TRT01A == "Xanomeline High Dose" ~ 9,
TRUE ~ NA_real_
),
# ITT flag: Y if ARM not missing
ITTFL = if_else(!str_missing(ARM), "Y", "N"),
# Death date
DTHDT = as_date(DTHDTC)
) |>
select(-DTHDTC)
##----------------------------------------------------------------------
## 3. EX-based variables: treatment dates, duration, dose
##----------------------------------------------------------------------
# Treatment dates
treat <- ex |>
select(USUBJID, EXSTDTC, EXENDTC) |>
mutate(
across(ends_with("DTC"), as_date)
) |>
group_by(USUBJID) |>
summarise(
TRTSDT = suppressWarnings(min(EXSTDTC, na.rm = TRUE)),
TRTEDT = suppressWarnings(max(EXENDTC, na.rm = TRUE)),
.groups = "drop"
) |>
mutate(
# Handle Inf from min/max when all values are NA
TRTSDT = replace(TRTSDT, is.infinite(TRTSDT), NA_Date_),
TRTEDT = replace(TRTEDT, is.infinite(TRTEDT), NA_Date_),
# Treatment duration (days)
TRTDURD = as.numeric(TRTEDT - TRTSDT + (TRTEDT >= TRTSDT))
)
# Dosing information
dose <- ex |>
group_by(USUBJID) |>
summarise(
DOSE01A = sum(EXDOSE, na.rm = TRUE),
.groups = "drop"
) |>
mutate(
DOSE01U = "mg"
) |>
select(USUBJID, DOSE01A, DOSE01U)
# Merge DM- and EX-based info
demo1 <- reduce(
list(demo, treat, dose),
.f = left_join,
by = "USUBJID"
) |>
mutate(
# Safety flag: Y if ITTFL = Y and TRTSDT not missing
SAFFL = if_else(ITTFL == "Y" & !str_missing(TRTSDT), "Y", "N")
)
##----------------------------------------------------------------------
## 4. DS-based variables: EOS, discontinuation, screen failure
##----------------------------------------------------------------------
demo2 <- demo1 |>
left_join(
ds |>
filter(DSCAT == "DISPOSITION EVENT") |>
select(USUBJID, DSCAT, DSDECOD, DSTERM),
by = "USUBJID"
) |>
mutate(
# End of study status
EOSSTT = case_when(
str_missing(DSDECOD) ~ "ONGOING",
DSDECOD == "COMPLETED" ~ "COMPLETED",
TRUE ~ "DISCONTINUED"
),
# Discontinuation reason and text
DCSREAS = if_else(
DSDECOD != "COMPLETED" & !str_missing(DSDECOD),
DSDECOD,
"",
missing = ""
),
DCSREASP = if_else(
DSDECOD != "COMPLETED" & !str_missing(DSTERM),
DSTERM,
"",
missing = ""
),
# Screen failure flag
SCRNFL = if_else(
DSDECOD == "SCREEN FAILURE",
"Y",
"N",
missing = "N"
)
) |>
select(-c(DSCAT, DSDECOD, DSTERM))
##----------------------------------------------------------------------
## 5. VS-based variables: weight, height, BMI
##----------------------------------------------------------------------
# Baseline weight (visit 3)
weight <- vs |>
filter(VSTESTCD == "WEIGHT", VISITNUM == 3) |>
select(USUBJID, WEIGHTBL = VSSTRESN)
# Baseline height (visit 1)
height <- vs |>
filter(VSTESTCD == "HEIGHT", VISITNUM == 1) |>
select(USUBJID, HEIGHTBL = VSSTRESN)
demo3 <- reduce(
list(demo2, weight, height),
.f = left_join,
by = "USUBJID"
) |>
mutate(
BMIBL = (WEIGHTBL / HEIGHTBL / HEIGHTBL) * 10000,
BMIGR1 = case_when(
BMIBL < 25 ~ "<25",
BMIBL >= 25 ~ ">=25",
TRUE ~ NA_character_
)
)
##----------------------------------------------------------------------
## 6. Metadata (dataset + variable level) and final ADSL
##----------------------------------------------------------------------
# Dataset-level metadata
dlm <- tribble(
~dataset, ~label,
"adsl", "Subject-Level Analysis Dataset"
)
# Variable-level metadata
vlm <- tribble(
~dataset, ~variable, ~label, ~type, ~format,
"adsl", "STUDYID", "Study Identifier", "character", NA_character_,
"adsl", "USUBJID", "Unique Subject Identifier", "character", NA_character_,
"adsl", "SUBJID", "Subject Identifier for the Study", "character", NA_character_,
"adsl", "SITEID", "Study Site Identifier", "character", NA_character_,
"adsl", "AGE", "Age", "numeric", NA_character_,
"adsl", "AGEU", "Age Units", "character", NA_character_,
"adsl", "AGEGR1", "Pooled Age Group 1", "character", NA_character_,
"adsl", "AGEGR1N", "Pooled Age Group 1 (N)", "numeric", NA_character_,
"adsl", "SEX", "Sex", "character", NA_character_,
"adsl", "RACE", "Race", "character", NA_character_,
"adsl", "RACEN", "Race (N)", "numeric", NA_character_,
"adsl", "ETHNIC", "Ethnicity", "character", NA_character_,
"adsl", "SAFFL", "Safety Population Flag", "character", NA_character_,
"adsl", "ITTFL", "Intent-To-Treat Population Flag", "character", NA_character_,
"adsl", "SCRNFL", "Screen Failure Population Flag", "character", NA_character_,
"adsl", "ARM", "Description of Planned Arm", "character", NA_character_,
"adsl", "ACTARM", "Description of Actual Arm", "character", NA_character_,
"adsl", "TRT01P", "Planned Treatment for Period 01", "character", NA_character_,
"adsl", "TRT01PN", "Planned Treatment for Period 01 (N)", "numeric", NA_character_,
"adsl", "TRT01A", "Actual Treatment for Period 01", "character", NA_character_,
"adsl", "TRT01AN", "Actual Treatment for Period 01 (N)", "numeric", NA_character_,
"adsl", "DOSE01A", "Actual Treatment Dose for Period 01", "numeric", NA_character_,
"adsl", "DOSE01U", "Units for Dose for Period 01", "character", NA_character_,
"adsl", "TRTSDT", "Date of First Exposure to Treatment", "Date", "DATE9.",
"adsl", "TRTEDT", "Date of Last Exposure to Treatment", "Date", "DATE9.",
"adsl", "TRTDURD", "Total Treatment Duration (Days)", "numeric", NA_character_,
"adsl", "EOSSTT", "End of Study Status", "character", NA_character_,
"adsl", "DCSREAS", "Reason for Discontinuation from Study", "character", NA_character_,
"adsl", "DCSREASP", "Reason Spec for Discont from Study", "character", NA_character_,
"adsl", "DTHDT", "Date of Death", "Date", "DATE9.",
"adsl", "WEIGHTBL", "Weight (kg) at Baseline", "numeric", NA_character_,
"adsl", "HEIGHTBL", "Height (cm) at Baseline", "numeric", NA_character_,
"adsl", "BMIBL", "Body Mass Index (kg/m2) at Baseline", "numeric", NA_character_,
"adsl", "BMIGR1", "Pooled BMI Group 1", "character", NA_character_
)
adsl <- demo3 |>
# Order variables
select(
STUDYID, USUBJID, SUBJID, SITEID,
AGE, AGEU, AGEGR1, AGEGR1N, SEX, RACE, RACEN, ETHNIC,
SAFFL, ITTFL, SCRNFL,
ARM, ACTARM,
TRT01P, TRT01PN, TRT01A, TRT01AN,
DOSE01A, DOSE01U,
TRTSDT, TRTEDT, TRTDURD,
EOSSTT, DCSREAS, DCSREASP,
DTHDT,
WEIGHTBL, HEIGHTBL, BMIBL, BMIGR1
) |>
arrange(USUBJID) |>
# Apply labels / types / formats
xportr_df_label(dlm, domain = "adsl") |>
xportr_label(vlm, domain = "adsl") |>
xportr_type(vlm, domain = "adsl") |>
xportr_format(vlm, domain = "adsl") |>
# Replace character NA with ""
mutate(across(where(is.character), ~ replace(., is.na(.), "")))
##----------------------------------------------------------------------
## 7. QC: label + metadata + export
##----------------------------------------------------------------------
# Check dataframe label
attr(adsl, "label")
# Check metadata
adsl |>
contents()
# Output to specified path
xportr_write(adsl, "./adsl.xpt")13 Creating ADaM: ADSL from SDTM-like Inputs
Note: Real ADSL creation must follow ADaM IG (derive flags, dates, imputations, populations). This example is educational only.
14 ADVS program
# ADVS Dataset Creation Program
# Analysis Dataset for Vital Signs
# Following CDISC ADaM Standards
# Load required libraries
library(dplyr)
library(lubridate)
library(haven)
# Function to create ADVS dataset
create_advs <- function(vs_data, adsl_data) {
# Step 1: Start with VS domain and merge with ADSL
advs <- vs_data %>%
# Filter for vital signs records
filter(! is.na(vsstresn) | !is.na(vsstresc)) %>%
# Merge with ADSL to get treatment information
left_join(adsl_data, by = c("studyid" = "STUDYID", "usubjid" = "USUBJID"))
# Step 2: Create basic variables
advs <- advs %>%
mutate(
# Study and subject identifiers
STUDYID = studyid,
USUBJID = usubjid,
# Treatment variables from ADSL
TRTP = TRT01P,
TRTPN = TRT01PN,
TRTA = TRT01A,
TRTAN = TRT01AN,
# Parameter information
PARAM = case_when(
vstestcd == "SYSBP" ~ "Systolic Blood Pressure (mmHg)",
vstestcd == "DIABP" ~ "Diastolic Blood Pressure (mmHg)",
vstestcd == "PULSE" ~ "Pulse Rate (beats/min)",
vstestcd == "RESP" ~ "Respiratory Rate (breaths/min)",
vstestcd == "TEMP" ~ "Body Temperature (C)",
vstestcd == "WEIGHT" ~ "Weight (kg)",
vstestcd == "HEIGHT" ~ "Height (cm)",
TRUE ~ vstest
),
PARAMCD = vstestcd,
PARAMN = case_when(
vstestcd == "SYSBP" ~ 1,
vstestcd == "DIABP" ~ 2,
vstestcd == "PULSE" ~ 3,
vstestcd == "RESP" ~ 4,
vstestcd == "TEMP" ~ 5,
vstestcd == "WEIGHT" ~ 6,
vstestcd == "HEIGHT" ~ 7,
TRUE ~ as.numeric(NA)
),
# Analysis value
AVAL = as.numeric(vsstresn),
# Date/time variables
ADT = as.Date(substr(vsdtc, 1, 10)),
ADY = as.numeric(ADT - as.Date(TRTSDT) + (ADT >= as.Date(TRTSDT))),
# Visit information
VISIT = visit,
ATPT = vstpt,
ATPTN = vstptnum
) %>%
# Filter to keep only records with non-missing AVAL
filter(! is.na(AVAL))
# Step 3: Create analysis visit windows (this would typically come from a separate mapping)
# For demonstration, creating basic visit windows
advs <- advs %>%
mutate(
AVISIT = case_when(
grepl("SCREENING|SCREEN", toupper(VISIT)) ~ "Screening",
grepl("BASELINE|DAY 1", toupper(VISIT)) ~ "Baseline",
grepl("WEEK 2|DAY 14", toupper(VISIT)) ~ "Week 2",
grepl("WEEK 4|DAY 28", toupper(VISIT)) ~ "Week 4",
grepl("WEEK 8|DAY 56", toupper(VISIT)) ~ "Week 8",
grepl("WEEK 12|DAY 84", toupper(VISIT)) ~ "Week 12",
grepl("FOLLOW", toupper(VISIT)) ~ "Follow-up",
TRUE ~ VISIT
),
AVISITN = case_when(
AVISIT == "Screening" ~ -1,
AVISIT == "Baseline" ~ 0,
AVISIT == "Week 2" ~ 2,
AVISIT == "Week 4" ~ 4,
AVISIT == "Week 8" ~ 8,
AVISIT == "Week 12" ~ 12,
AVISIT == "Follow-up" ~ 99,
TRUE ~ as.numeric(NA)
),
# Analysis window variables (example values)
AWTARGET = AVISITN * 7, # Target day
AWU = "DAYS",
AWLO = AWTARGET - 3, # 3-day window
AWHI = AWTARGET + 3,
AWTDIFF = abs(ADY - AWTARGET)
)
# Step 4: Determine baseline records
advs <- advs %>%
group_by(USUBJID, PARAMCD) %>%
mutate(
# Baseline flag - last assessment on or before treatment start date
baseline_candidate = case_when(
ADT <= as.Date(TRTSDT) ~ ADT,
TRUE ~ as.Date(NA)
)
) %>%
group_by(USUBJID, PARAMCD) %>%
mutate(
max_baseline_date = max(baseline_candidate, na.rm = TRUE),
ABLFL = case_when(
! is.na(baseline_candidate) & baseline_candidate == max_baseline_date ~ "Y",
TRUE ~ ""
)
) %>%
ungroup()
# Step 5: Create BASE, CHG, and PCHG
advs <- advs %>%
group_by(USUBJID, PARAMCD) %>%
mutate(
BASE = case_when(
any(ABLFL == "Y") ~ AVAL[ABLFL == "Y"][1],
TRUE ~ as. numeric(NA)
),
CHG = case_when(
! is.na(BASE) & ADT > as.Date(TRTSDT) ~ AVAL - BASE,
TRUE ~ as. numeric(NA)
),
PCHG = case_when(
!is.na(BASE) & BASE != 0 & ADT > as. Date(TRTSDT) ~ ((AVAL - BASE) / BASE) * 100,
TRUE ~ as.numeric(NA)
)
) %>%
ungroup()
# Step 6: Create analysis flags
advs <- advs %>%
mutate(
# ANL01FL - Analysis Flag 01 (include all valid records)
ANL01FL = "Y",
# DTYPE - Derivation Type (empty for observed values)
DTYPE = ""
) %>%
group_by(USUBJID, PARAMCD, AVISIT) %>%
mutate(
# ANL02FL - One record per visit per parameter
visit_rank = rank(AWTDIFF, ties.method = "first"),
ANL02FL = case_when(
ANL01FL == "Y" & visit_rank == 1 ~ "Y",
TRUE ~ ""
)
) %>%
ungroup()
# Step 7: Create criterion variables for blood pressure parameters
advs <- advs %>%
mutate(
# Criterion 1: Low BP
CRIT1 = case_when(
PARAMCD == "SYSBP" ~ "SYSBP <= 90 mmHg",
PARAMCD == "DIABP" ~ "DIABP <= 60 mmHg",
TRUE ~ ""
),
CRIT1FL = case_when(
PARAMCD == "SYSBP" & AVAL <= 90 ~ "Y",
PARAMCD == "DIABP" & AVAL <= 60 ~ "Y",
PARAMCD %in% c("SYSBP", "DIABP") ~ "",
TRUE ~ ""
),
# Criterion 2: High BP
CRIT2 = case_when(
PARAMCD == "SYSBP" ~ "SYSBP >= 140 mmHg",
PARAMCD == "DIABP" ~ "DIABP >= 90 mmHg",
TRUE ~ ""
),
CRIT2FL = case_when(
PARAMCD == "SYSBP" & AVAL >= 140 ~ "Y",
PARAMCD == "DIABP" & AVAL >= 90 ~ "Y",
PARAMCD %in% c("SYSBP", "DIABP") ~ "",
TRUE ~ ""
),
# Criterion 3: >20% increase from baseline
CRIT3 = case_when(
PARAMCD == "SYSBP" ~ "SYSBP change from baseline > 20% increase",
PARAMCD == "DIABP" ~ "DIABP change from baseline > 20% increase",
TRUE ~ ""
),
CRIT3FL = case_when(
PARAMCD == "SYSBP" & ! is.na(PCHG) & PCHG > 20 ~ "Y",
PARAMCD == "DIABP" & !is.na(PCHG) & PCHG > 20 ~ "Y",
PARAMCD %in% c("SYSBP", "DIABP") & !is.na(PCHG) ~ "",
TRUE ~ ""
),
# Criterion 4: >20% decrease from baseline
CRIT4 = case_when(
PARAMCD == "SYSBP" ~ "SYSBP change from baseline > 20% decrease",
PARAMCD == "DIABP" ~ "DIABP change from baseline > 20% decrease",
TRUE ~ ""
),
CRIT4FL = case_when(
PARAMCD == "SYSBP" & !is.na(PCHG) & PCHG < -20 ~ "Y",
PARAMCD == "DIABP" & !is. na(PCHG) & PCHG < -20 ~ "Y",
PARAMCD %in% c("SYSBP", "DIABP") & !is.na(PCHG) ~ "",
TRUE ~ ""
)
)
# Step 8: Select final variables in correct order
advs_final <- advs %>%
select(
STUDYID, USUBJID, TRTP, TRTPN, TRTA, TRTAN,
PARAM, PARAMCD, PARAMN, AVAL, BASE, CHG, PCHG,
DTYPE, ADT, ADY, AVISIT, AVISITN, AWTARGET, AWTDIFF,
AWLO, AWHI, AWU, VISIT, ATPT, ATPTN,
ABLFL, ANL01FL, ANL02FL,
CRIT1, CRIT1FL, CRIT2, CRIT2FL, CRIT3, CRIT3FL, CRIT4, CRIT4FL,
# Include core ADSL variables
AGE, AGEU, AGEGR1, AGEGR1N, SEX, RACE, RACEN, SAFFL, ITTFL
) %>%
arrange(STUDYID, USUBJID, PARAMN, ADT, ATPTN)
return(advs_final)
}
# Example usage:
# Load your data
# vs <- read_sas("path/to/vs.sas7bdat")
# adsl <- read_sas("path/to/adsl. sas7bdat")
# Create ADVS dataset
# advs <- create_advs(vs, adsl)
# Write to file
# write_sas(advs, "path/to/advs.sas7bdat")
# Print summary
# cat("ADVS dataset created with", nrow(advs), "records\n")
# cat("Parameters included:", paste(unique(advs$PARAMCD), collapse = ", "), "\n")
# cat("Subjects included:", length(unique(advs$USUBJID)), "\n")Exercises 1. Add analysis populations (e.g., SAFFL, FASFL) based on simple rules. 2. Derive AGEGR1 as <65 / ≥65 and use ordered factor. 3. Add a treatment end date TRTEDT and compute treatment duration.