1

このコードの修正を手伝ってくれる人はいますか? 昨年はスクリプトを問題なく使用していましたが、現在は URL への接続に問題があります。

どうすれば修正できますか?

私が欲しいのは、2015-12-01 から 2016-04-15 までの気象観測所「EKAH」(Tirstrup、オーフス空港、デンマーク) からデータを収集して整理することです。

    ############## 1) Run function --------------------

    wunder_station_daily <- function(station, date)
    {
      base_url <- 'https://www.wunderground.com/history/airport'

      # Example website: https://www.wunderground.com/history/airport/EKAH/2016/06/09/DailyHistory.html?&MR=1

      # parse date
      m <- as.integer(format(date, '%m'))
      d <- as.integer(format(date, '%d'))
      y <- format(date, '%Y')

      # compose final url
      final_url <- paste(base_url,
                         '/', station,
                         '/', y,
                         '/', m, 
                         '/', d,
                         '/DailyHistory.html?&MR=1', sep='')

      # reading in as raw lines from the web server
      # contains <br> tags on every other line

      #   u <- url(final_url)
      #   the_data <- readLines(u)
      #   close(u)

      the_data <- getURL(final_url, ssl.verifypeer=0L, followlocation=1L)

      # only keep records with more than 5 rows of data
      if(length(the_data) > 5 )
      {
        # remove the first and last lines
        the_data <- the_data[-c(1, length(the_data))]

        # remove odd numbers starting from 3 --> end
        the_data <- the_data[-seq(3, length(the_data), by=2)]

        # extract header and cleanup
        the_header <- the_data[1]
        the_header <- make.names(strsplit(the_header, ',')[[1]])

        # convert to CSV, without header
        tC <- textConnection(paste(the_data, collapse='\n'))
        the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
        close(tC)

        # remove the last column, created by trailing comma
        the_data <- the_data[, -ncol(the_data)]

        # assign column names
        names(the_data) <- the_header

        # convert Time column into properly encoded date time
        the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))

        # remove UTC and software type columns
        the_data$DateUTC.br. <- NULL
        the_data$SoftwareType <- NULL

        # sort and fix rownames
        the_data <- the_data[order(the_data$Time), ]
        row.names(the_data) <- 1:nrow(the_data)

        # done
        return(the_data)
      }
    }


    ############## 2) Get data for a range of dates ------------------------------


    date.range <- seq.Date(from=as.Date('2015-12-01'), to=as.Date('2015-12-04'), by='1 day')
    station <- 'EKAH'


    # pre-allocate list
    l <- vector(mode='list', length=length(date.range))

    # loop over dates, and fetch data
    for(i in seq_along(date.range))
    {
      print(paste0("Fetching data: ", date.range[i]))
      l[[i]] <- wunder_station_daily('EKAH', date.range[i])
    }

    # stack elements of list into DF, filling missing columns with NA
    d <- ldply(l)
4

1 に答える 1