# TODO: Add comment # # Author: Gene ############################################################################### CleanUpTable_simple = function(dat){ #browser() ## Extract location location = colnames(dat)[1] ## Attach location to data as an attribute attr(dat, 'location') = location ## Delete some rows dat = dat[5:nrow(dat),] dat = dat[1:(nrow(dat)-1), ] ## Remove any rows that start with "Note" (these are about time changes) if(length(grep('Note:', dat))!=0) dat = dat[-grep('Note:', dat[,1]), ] ## Restart row numbers rownames(dat) = NULL ## Manually define column names colnames(dat) = c('Date','Sunrise','Sunset','Day_Length','Day_Length_Difference', 'Solar_Noon_Time','Solar_Noon_Altitude','Distance_10e6_KM') ## Convert the Date dat$Date = as.Date(dat$Date, "%b %d, %Y") ## Define the columns with time values manually jj = c(2,3,6) ## Remove all the 12:00 Noon references and replace with 12:00 PM dat[,jj] = sapply(dat[ , jj], function(x) gsub('12:00 Noon', '12:00 PM',x)) ## Join date column with time columns (cols 2-9, and col 14) dat[,jj] = sapply(jj, function(x) paste(dat[,1], dat[,x])) ## Convert times columns to a date/time format dat[,jj] = sapply(jj, function(x) strptime(dat[,x], "%Y-%m-%d %I:%M %p")) ## Convert numeric data to numeric dat$Solar_Noon_Altitude = as.numeric( substr(dat$Solar_Noon_Altitude, 1, nchar(dat$Solar_Noon_Altitude)-1)) dat$Distance_10e6_KM = as.numeric( substr(dat$Distance_10e6_KM, 1, nchar(dat$Distance_10e6_KM)-1)) ## Convert day length to decimal dayhours = as.numeric(gsub('h.+','', dat$Day_Length)) dayminutes = as.numeric(gsub('.+h |m .+','', dat$Day_Length)) dayseconds = as.numeric(gsub('.+m |s','', dat$Day_Length)) ## Replace "Day Length" dat$Day_Length = dayhours/24 + dayminutes / 24 / 60 + dayseconds / 24 / 60 / 60 ## Convert day length difference to decimal ## Standardize the vector to include minutes ## Rather complicated part: ## The "-" looking character from the website is not really a "-". ## So, it needs to be converted to a regular - dat$Day_Length_Difference = gsub(intToUtf8(8722), '-', dat$Day_Length_Difference) dat$Day_Length_Difference = gsub('< 1s', '+ 0m 0s', dat$Day_Length_Difference) dat$Day_Length_Difference = gsub('\\+ ', '\\+ 0m ', dat$Day_Length_Difference) dat$Day_Length_Difference = gsub('\\- ', '\\- 0m ', dat$Day_Length_Difference) ## Extract time components daysign = substr(dat$Day_Length_Difference, 1, 1) daysignvector = rep(1, length(daysign)) daysignvector[daysign=='-'] = -1 dayminutes = as.numeric(gsub('^. |m .+','', dat$Day_Length_Difference)) dayseconds = as.numeric(gsub('.+m |s','', dat$Day_Length_Difference)) ## New "Day Length Difference" dat$Day_Length_Difference = (dayminutes / 24 / 60 + dayseconds / 24 / 60 / 60) * daysignvector ## Return Result return(dat) }