##Code to Read the Clinical data from the JSON file
library(jsonlite)
library(dplyr)
library(Hmisc)
library(data.table)

install.packages('Hmisc')

# reading in the json for biospecimen
tmp_biospecimen <- fromJSON('C:/Users/thood/Downloads/biospecimen.cart.2016-09-07T21-46-35.302439.json')

# rbindlist of biospecimen samples for inspecting sample_type
tmp_biospecimen_2 <- tmp_biospecimen$samples %>% rbindlist() %>% data.table()
tmp_biospecimen_2 %>% select(-portions) %>% describe()
tmp_biospecimen_2 %>% select(c(sample_id)) %>% unique() %>% arrange(sample_id) %>% table()

tmp_clin <- fromJSON('C:/Users/thood/Downloads/clinical.cart.2016-09-07T21-34-57.820511.json')
tmp_clin$case_id %>% length
tmp_clin2 <- tmp_clin$diagnoses %>% rbindlist() %>% data.table()

tmp_clin %>% describe()

e %>% describe()



# index the folder of folders
test <- list.files('C:/Users/thood/Documents/R/TCGA/TCGA-LAML')

# to filter by specimen case_id
tmp_biospecimen %>% select(c(case_id)) %>% arrange(case_id) %>% filter(case_id %in% 'f3c7fc84-3df8-4ff7-a378-26ec5d9e08a5')
  # matches with annotation entity_id for each zipped folder within each downloaded tcga folder



# to filter by specimen sample
tmp_biospecimen_2 %>% select(c(sample_id)) %>% arrange(sample_id) %>% filter(sample_id %in% '0a7bfd86-45c8-4959-9374-3f5166410c27')