Rでのデータ抽出用スクリプト weighted sortを模した計算式を含めた。 使いかたはこちrの関連記事で。
library(RCurl) library(XML)
getAuth <- function(email,pw){ urlAc <- "https://www.google.com/accounts/ClientLogin" #method <- "post" rs <- postForm(urlAc, .params = list(accountType="GOOGLE", Email=email, Passwd=pw, service="analytics", source="rganalytics")) sub("\\n","",sub(".*Auth\\=","",rs)) }
getAccounts <- function(auth){ auth.plus <- paste("GoogleLogin Auth=",auth,sep="") #get all webProperties pr.h <- basicTextGatherer() pr.url <- "https://www.google.com/analytics/feeds/datasources/ga/accounts/~all/webproperties/~all/profiles" curlPerform(url=pr.url, .opts=curlOptions(httpheader=c(Authorization=auth.plus)), writefunction=pr.h$update) pr.body <- pr.h$value() pr.xml <- xmlParse(pr.body) pr.ids <- xpathSApply(pr.xml, "//atom:entry/dxp:property[@name='ga:profileId']",quote(xmlGetAttr(x,'value')), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) pr.names <- xpathSApply(pr.xml, "//atom:entry/dxp:property[@name='ga:profileName']",quote(xmlGetAttr(x,'value')), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) pr.ac.ids <- xpathSApply(pr.xml, "//atom:entry/dxp:property[@name='ga:accountId']",quote(xmlGetAttr(x,'value')), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) print("get profiles and now getting accounts data") #get all accounts ac.h <- basicTextGatherer() ac.url <- "https://www.google.com/analytics/feeds/datasources/ga/accounts" curlPerform(url=ac.url, .opts=curlOptions(httpheader=c(Authorization=auth.plus)), writefunction=ac.h$update) ac.body <- ac.h$value() ac.xml <- xmlParse(ac.body) ac.names <- xpathSApply(ac.xml, "//atom:entry/dxp:property[@name='ga:accountName']", quote(xmlGetAttr(x,"value")), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) ac.ids <- xpathSApply(ac.xml, "//atom:entry/dxp:property[@name='ga:accountId']", quote(xmlGetAttr(x,"value")), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) print(paste("accounts",length(ac.ids), " ", "profiles", length(pr.ids))) print("アカウント毎に一つずつプロファイル表示") pr.dt <- data.frame(pr.ids, pr.names, pr.ac.ids, stringsAsFactors=FALSE) ac.dt <- data.frame(ac.ids, ac.names, stringsAsFactors=FALSE) rt <- merge(ac.dt,pr.dt,by.x= "ac.ids", by.y="pr.ac.ids") print(rt[rt$pr.ids %in% tapply(rt$pr.ids,rt$ac.ids,min),]) rt #dt <- as.data.frame(matrix(NA, nrow=length(entries), ncol=3)) } myurl <- "https://www.google.com/analytics/feeds/data?ids=ga%3A30547051&metrics=ga%3Avisits&dimensions=ga%3Adate%2Cga%3Ahour&start-date=2010-09-13&end-date=2010-09-27&max-results=1000"
getData <- function( auth, id, dimensions=NA, metrics=NA,segment=NA, filters=NA, sort=NA, start.date=strftime((ga.today-30),"%Y-%m-%d"), end.date=strftime(ga.today,"%Y-%m-%d"), start.index=NA, max.results=NA){ url <- "https://www.google.com/analytics/feeds/data?" ga.today <- Sys.Date() ids <- paste("ids=ga:",id,sep="") dimp <- NULL dmlists <- NULL mtlists <- NULL if(!is.na(dimensions[1])){ p.dimensions <- paste("dimensions=", paste(sapply(dimensions,function(x){paste("ga:",x,sep="")}),collapse=","), sep="") }else{ p.dimensions <- "" } if(is.na(metrics[1])){ print("at least a metric") return }else{ p.metrics <- paste("metrics=", paste(sapply(metrics,function(x){paste("ga:",x,sep="")}),collapse=","), sep="") } ifelse(!is.na(segment), p.segment <- paste("segment=",segment,sep=""), p.segment <- "") ifelse(!is.na(filters), p.filters <- paste("filters=",paste("ga:",filters,sep=""),sep=""),p.filters <- "") ifelse((!is.na(sort)), p.sort <- paste("sort=",sort,sep=""), p.sort <- "") ifelse(!is.na(start.index), p.start.index <- paste("start-index=",start.index,sep=""), p.start.index <- "") ifelse(!is.na(max.results), p.max.results <- paste("max-results=",max.results,sep=""), p.max.result <- "") p.start.date <- paste("start-date=",start.date,sep="") p.end.date <- paste("end-date=",end.date,sep="") pms <- c(ids, p.dimensions, p.metrics, p.segment, p.filters, p.sort, p.start.date, p.end.date, p.start.index, p.max.results) pm <- paste(pms[!is.na(pms)], collapse="&") url <- paste(url, pm,sep="") print(url) p.auth <- paste("GoogleLogin Auth=",auth,sep="") h <- basicTextGatherer() curlPerform(url=url, .opts=curlOptions(httpheader=c(Authorization=p.auth)), writefunction=h$update) body <- h$value() doc <- xmlParse(body) #print(metrics) mtlists <- lapply(metrics, function(d){ path = paste("//atom:entry/dxp:metric[@name='ga:", d, "']",sep="") xpathSApply(doc, path, quote(xmlGetAttr(x,'value')), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) }) dmlists <- lapply(dimensions, function(d){ path = paste("//atom:entry/dxp:dimension[@name='ga:", d, "']",sep="") xpathSApply(doc, path, quote(xmlGetAttr(x,'value')), namespaces=c(atom='http://www.w3.org/2005/Atom',dxp='http://schemas.google.com/analytics/2009')) }) totalResults <- xmlValue(getNodeSet(doc, "//atom:feed/openSearch:totalResults/text()", namespaces=c(atom='http://www.w3.org/2005/Atom',openSearch="http://a9.com/-/spec/opensearchrss/1.0/",dxp='http://schemas.google.com/analytics/2009'))[[1]]) itemsPerPage <- xmlValue(getNodeSet(doc, "//atom:feed/openSearch:itemsPerPage/text()", namespaces=c(atom='http://www.w3.org/2005/Atom',openSearch="http://a9.com/-/spec/opensearchrss/1.0/",dxp='http://schemas.google.com/analytics/2009'))[[1]]) startIndex <- xmlValue(getNodeSet(doc, "//atom:feed/openSearch:startIndex/text()", namespaces=c(atom='http://www.w3.org/2005/Atom',openSearch="http://a9.com/-/spec/opensearchrss/1.0/",dxp='http://schemas.google.com/analytics/2009'))[[1]]) print(paste("get(", startIndex, "-", (as.numeric(startIndex) - 1 + as.numeric(itemsPerPage[1])), ") in ", totalResults[1], sep="")) #print(paste("start.index","itemsPerPage[1]","totalResults[1]",startIndex,itemsPerPage[1],totalResults[1])) if(is.null(dmlists[1][[1]][1][[1]])){ #データ形式がよく分かってないxpathが空で変えるデータなんだが、、、 dt <- as.data.frame(mtlists,stringsAsFactors=FALSE) }else{ dt <- cbind(as.data.frame(dmlists,stringsAsFactors=FALSE), as.data.frame(mtlists,stringsAsFactors=FALSE)) } ifelse(is.na(dimensions), (colnames(dt) <- metrics), (colnames(dt) <- c(dimensions,metrics,recursive=TRUE))) dt[,metrics] <- sapply(dt[,metrics],as.numeric) print(paste(colnames(dt),collapse=",")) if("date" %in% colnames(dt)) dt$date <- as.Date(dt$date,"%Y%m%d") if(startIndex == "1" & totalResults[1]>itemsPerPage[1]){ print("残りデータを取得") bpoint <- as.numeric(itemsPerPage[1]) + 1 for(i in seq(bpoint, as.numeric(totalResults[1]), by=10000)){ #print(paste(filters,sort,segment,start.index, start.date, end.date, max.results)) dt <- rbind(dt, getData(auth=auth, id=id, metrics=metrics, dimensions=dimensions, filters=filters, sort=sort, segment=segment, start.index=as.character(i), start.date=start.date, end.date=end.date, max.results=10000)) } } dt <- dt[rev(order(dt[metrics[1]])),] print(head(dt,5)) dt } #加重平均計算用
weighted.mean <- function(data, elm, denom, prior.mean=NULL, prior.num=NULL, cname=NULL){ if(is.null(prior.num)){ prior.num = max(data[,denom]) } if(is.null(prior.mean)){ prior.mean <- sum(data[, elm])/sum(data[, denom]) } print((prior.mean)*100) cname <- ifelse(is.null(cname),"w.rate", cname) data <- transform(data, #w.rate.asis=(data[,elm]/data[,denom]), tmp = (ifelse(data[,denom]==0, prior.mean, (data[,denom]/prior.num)*(data[,elm]/data[,denom]) + (1 - data[,denom]/prior.num)*(prior.mean) )
#以下は実行には直接いらない。データの項目を忘れた時用のもの
ga.dimensions = list( visitor=c("browser","city","country","date","day","daysSinceLastVisits","hostname","month","pageDepth","region","visitCount","visitLength","visitorTyep","week","year"), campaign=c("adContent","adGroup","adSlot","adSlotPosision","campaign","keyword","medium","referralPath","source"), content=c("pagePath","exitPagePath","landingPagePath","nextPagePath","pageTitle","previousPagePath","nextPagePath"), ecommerce=c("affiliation","daysToTransaction","productCategory","productName","productSku","transactionId","visitsToTransaction"), internal.search=c("searchCategory","searchDestination","searchKeyword","searchKeywordRefinement","searchUsed") ,custom.variable=c("customVarName1","customVarValue1"), events=c("eventCategory","eventAction","eventLabel") ) ga.metrics = list( visitor=c("bounces","entrances","exits","newVisits","pageviews","timeOnPage","timeOnSite","visitors","visits"), campaign=c("adClicks","adCost","CPC","CPM","CTR","impressions"), content=c("uniquePagviews"), ecommerce=c("itemRevenue","itemQuanity","transactions","transactionShipping","transactionTax","uniquePurchases"), internel.search=c("serchDepath","searchDuration","searchExits","searchRefinements","searchUniques","searchVisits"), goals=c("goal1Completions","goalCompletionsAll","goal1Starts","goal1Value","goalValueAll"), events=c("totalEvents","uniqueEvents","eventValue") ) |
z_API > DataExport API >