B As-is R Files

교수님께서 주신 원본 R 파일 입니다.

B.1 Lecture 3

#################################################
##---------------------------------------------##
##                  Graphics                   ##
##---------------------------------------------##
#################################################

# 상위수준 그림 함수는 그림을 생성한다.
# 하위수준 그림 함수는 기존의 그림에 그림을 추가한다.

## 상위수준 그림 함수의 주요 인자 (arguments) ###

# main : 제목
# xlab/ylab : x축 및 y축 레이블
# xlim/ylim : x축 및 y축 범위
# col : 색깔
# lty : 선 모양
# pch : 점 모양
# cex : 그림 성분의 크기
# lwd : 선 굵기
# type : 그림 타입


#################################################
##########     상위수준 그림 함수    ############
#################################################

WD <- "D:\\AMC\\Education\\UU\\2017\\R\\Graphics\\"

setwd(WD)

dta <- read.csv("PK.csv")
head(dta)
str(dta)

################ scatter plot ###################

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0])

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], log="y")

plot(dta$TIME[dta$MDV==0], log(dta$DV[dta$MDV==0]))

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0]
    , xlab="Time (hr)", ylab="Concentration (ng/mL)" 
    , type="o", pch=2, col=1, main="PK time-course of Drug X"
    , xlim =c(-2,218), ylim=c(0,80))

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], axes=F,
    , xlab="Time (hr)", ylab="Concentration (ng/mL)" 
    , type="o", pch=2, col=1, main="PK time-course of Drug X"
    , xlim =c(-2,218), ylim=c(0,80))
axis(1, at=seq(0, 218, 24))
axis(2)
box()



################## Histogram ####################

d.demog <- read.csv("DEMOG.csv")

# histogram
hist(d.demog$HT)

hist(d.demog$HT, breaks=10)
hist(d.demog$HT, nclass=10)

# with density line
hist (d.demog$HT, probability=TRUE, breaks=10)
lines(density(d.demog$HT))


hist (d.demog$HT, probability=TRUE, breaks=9, xaxt="n"
      , main="Histogram for Height", xlab="Height (cm)", ylab="Probability (%)")
axis(1, at=seq(min(d.demog$HT), max(d.demog$HT), 3))
lines(density(d.demog$HT))


hist (d.demog$HT, probability=TRUE, breaks=9, xaxt="n"
      , main="Histogram for Height", xlab="Height (cm)", ylab="Probability (%)"
      , col = "lightblue", border = "pink")
axis(1, at=seq(min(d.demog$HT), max(d.demog$HT), 3))
lines(density(d.demog$HT))


############## Box-Whisker Plot #################

# Box-and-Whisker Plot

boxplot(d.demog$WT)

boxplot(d.demog$WT ~ d.demog$SEX)

boxplot(split(d.demog$WT, d.demog$SEX))

boxplot(WT ~ SEX, data=d.demog)

boxplot(d.demog$WT ~ d.demog$SEX
        , names=c("Male","Female"), ylab="AGE, year", ylim=c(min(d.demog$WT)-2, max(d.demog$WT)+2)
            , col="pink")

boxplot(d.demog$WT ~ d.demog$SEX
        , names=c("Male","Female"), ylab="AGE, year", ylim=c(min(d.demog$WT)-2, max(d.demog$WT)+2)
            , col=c("lightblue", "salmon"), width=c(0.6, 1))

#varwidth: if varwidth is TRUE, the boxes are drawn with widths proportional  
#to the square-roots of the number of observations in the groups.

boxplot(d.demog$WT ~ d.demog$SEX
        , names=c("Male","Female"), ylab="AGE, year", ylim=c(min(d.demog$WT)-2, max(d.demog$WT)+2)
            , col=c("lightblue", "salmon")
            , varwidth=TRUE)



################## Bar Plot #####################

barplot(d.demog$HT)

VADeaths

barplot(VADeaths, border = "dark blue")

barplot(VADeaths, col = rainbow(20))

barplot(VADeaths, col = heat.colors(8))

barplot(VADeaths, col = gray.colors(4))

barplot(VADeaths, col = gray.colors(4), log="x")
barplot(VADeaths, col = gray.colors(4), log="y")
barplot(VADeaths, col = gray.colors(4), log="xy")



################## pie chart ####################

drug.X.market <- c(0.12, 0.29, 0.32, 0.22, 0.11, 0.28)
names(drug.X.market) <- c("South Korea","China","USA","Japan","Austria","EU")
pie(drug.X.market)


################ matplot 함수 ###################

# matrix와 column 사이의 그림

pct.95 <- read.csv("pct95.csv")
matplot(pct.95[,1], pct.95[,2:ncol(pct.95)], pch=1)

matplot(pct.95[,1], pct.95[,2:ncol(pct.95)], pch=1, col=c(1,2,1), type="l", lty=1, lwd=c(1,2,1))



###### Scatter plot matrices (pairs plots) ######

pairs(d.demog)

#add a loess smoother, type:
pairs(d.demog, panel = panel.smooth)

  panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
   {
       usr <- par("usr"); on.exit(par(usr))
       par(usr = c(0, 1, 0, 1))
       r = (cor(x, y))
       txt <- format(c(r, 0.123456789), digits=digits)[1]
       txt <- paste(prefix, txt, sep="")
       if(missing(cex.cor)) cex <- 1.5
       text(0.5, 0.5, txt, cex = 1.5)
   }

pairs(d.demog, lower.panel=panel.smooth, upper.panel=panel.cor) 



#################################################
##             하위수준 그림 함수              ##
#################################################

# points : 점추가
# lines : 선 추가
# abline : 기준선 추가
# mtext : 텍스트 추가
# legend : 설명(legend) 추가
# polygon : polygon 추가


############ 점, 선, 설명 추가 하기 #############

plot(pct.95$TIME, pct.95$PCT50, main="PK of Drug X"
     , type="l", xlab="Time (h)", ylab="Concentration (ng/ml)"
     , ylim=range(0,80), lty=1, col="red", lwd=2)


plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], main="PK of Drug X"
     , type="n", xlab="Time (h)", ylab="Concentration (ng/ml)"
     , ylim=range(0,80))
points(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], pch = 16, cex=0.8)
lines(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], col="black", lwd=1)
abline(40, 0, col="red", lty=2)                               #abline(a,b): y=a+b*x
legend("topright", legend=c("Individual concentrations")
       , lty=1, col="black")


################# polygon 함수 ###################

plot(c(1, 10), c(1, 6), type = "n")
polygon(c(2,8,8,2), c(5,4,3,2), col="lightgreen")


plot(c(1, 9), 1:2, type = "n")
polygon(1:9, c(2,1,2,1,1,2,1,2,1),
        col = c("red", "blue"),
        border = c("green", "yellow"),
        lwd = 3, lty = c("dashed", "solid"))


################# 그림 출력하기 ##################

#--pdf graphics devices 
pdf("PK_of_Drug_X.pdf")

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], main="PK of Drug X"
     , type="n", xlab="Time (h)", ylab="Concentration (ng/ml)"
     , ylim=range(0,80))
points(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], pch = 16, cex=0.8)
lines(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], col="black", lwd=1)
abline(40, 0, col="red", lty=2)                               #abline(a,b): y=a+b*x
legend("topright", legend=c("Individual concentrations")
       , lty=1, col="black")

dev.off()


#--PNG graphics devices
png("PK_of_Drug_X.png")

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], main="PK of Drug X"
     , type="n", xlab="Time (h)", ylab="Concentration (ng/ml)"
     , ylim=range(0,80))
points(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], pch = 16, cex=0.8)
lines(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], col="black", lwd=1)
abline(40, 0, col="red", lty=2)                               #abline(a,b): y=a+b*x
legend("topright", legend=c("Individual concentrations")
       , lty=1, col="black")
       
dev.off()



#--Windows graphics devices 
win.metafile("PK_of_Drug_X.wmf")

plot(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], main="PK of Drug X"
     , type="n", xlab="Time (h)", ylab="Concentration (ng/ml)"
     , ylim=range(0,80))
points(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], pch = 16, cex=0.8)
lines(dta$TIME[dta$MDV==0], dta$DV[dta$MDV==0], col="black", lwd=1)
abline(40, 0, col="red", lty=2)                               #abline(a,b): y=a+b*x
legend("topright", legend=c("Individual concentrations")
       , lty=1, col="black")
       
dev.off()

B.2 Lecture 4

# 2017-03-29

setwd("D:/Rt")
dir()

mydata = read.csv("MyData2017.csv", as.is=TRUE)

Theoph
library(lattice) # trellis

xyplot(conc ~ Time | Subject, data=Theoph)

xyplot(conc ~ Time | Subject, data=Theoph, type="b")

Theoph[,"ID"] = as.numeric(as.character(Theoph[,"Subject"]))

xyplot(conc ~ Time | ID, data=Theoph, type="b")

xyplot(conc ~ Time | as.factor(ID), data=Theoph, type="b")

write.csv(Theoph, "Theoph.csv", row.names=FALSE, quote=FALSE, na="")


IDs = sort(unique(Theoph[,"ID"])) ; IDs
nID = length(IDs) ; nID

demog = unique(Theoph[,c("ID","Wt")])
colnames(demog) = c("ID", "BWT")
write.csv(demog, "1-demog.csv", row.names=FALSE, quote=FALSE, na="")

DV = Theoph[,c("ID","Time", "conc")]
colnames(DV) = c("ID", "TIME", "DV")
write.csv(DV, "3-DV.csv", row.names=FALSE, quote=FALSE, na="")

adm = cbind(IDs, rep(0, nID), rep(320, nID))
colnames(adm) = c("ID", "TIME", "AMT")
write.csv(adm, "2-adm.csv", row.names=FALSE, quote=FALSE, na="")


demog = read.csv("1-demog.csv", as.is=TRUE)
adm = read.csv("2-adm.csv", as.is=TRUE)
dv = read.csv("3-dv.csv", as.is=TRUE)

AdmDv = merge(adm, dv, by=intersect(colnames(adm), colnames(dv)), all=TRUE)

DataAll = merge(demog, AdmDv, by=c("ID"), all=TRUE)

B.3 Lecture 5

# 2017-04-05 R-intro.pdf Chapter 08

pois
# ?dbeta
dnorm(0)
pnorm(0)
1 - pnorm(1.96)
# ?pnorm
pnorm(1.96, lower.tail=FALSE)
qnorm(0.5)
qnorm(0.975)
format(qnorm(0.975), digits=22)
rnorm(5)
rnorm(5, 10, 1)
x = rnorm(100, 10, 1)
mean(x)
sd(x)

2*pt(-2.43, df = 13)

2*pt(-2.43, df = 1000)

qnorm(0.995)
qf(0.01, 2, 7, lower.tail = FALSE)

# ?fivenum
faithful
str(faithful)
eruptions
attach(faithful)
eruptions
waiting

stem(waiting)
sort(eruptions)

hist(eruptions)
hist(eruptions, seq(1.6, 5.2, 0.2), prob=TRUE)
lines(density(eruptions, bw=0.1))
rug(eruptions)
# ?hist
# ?density
lines(density(eruptions, bw="SJ"), lty=3)
plot(ecdf(eruptions), do.points=FALSE, verticals=TRUE)
# ?plot
ecdf(eruptions)
x = ecdf(eruptions)
x
str(x)
x()
plot(ecdf(eruptions), do.points=FALSE)
plot(ecdf(eruptions))
long <- eruptions[eruptions > 3]
x <- seq(3, 5.4, 0.01)
pnorm(x, mean=mean(long), sd=sqrt(var(long)))

# ?par
x <- rt(250, df = 5)
qqnorm(x); qqline(x)

curve(dnorm, -5, 5)
y = density(x)
lines(y, lty=3)
# ?ppoints
ppoints(250)
ppoints(10)

qqplot(qt(ppoints(250), df = 5), x, xlab = "Q-Q plot for t dsn")
windows()
qqplot(qt(runif(250), df = 5), x, xlab = "Q-Q plot for t dsn")
# ?shapiro.test
# ?ks.test
# ?t.test


A = c(79.98, 80.04, 80.02, 80.04, 80.03, 80.03, 80.04, 79.97, 80.05, 80.03, 80.02, 80.00, 80.02)
B = c(80.02, 79.94, 79.98, 79.97, 79.97, 80.03, 79.95, 79.97)
boxplot(A, B)
t.test(A, B)

var.test(A, B)
t.test(A, B, var.equal=TRUE)
wilcox.test(A, B)
plot(ecdf(A), do.points=FALSE, verticals=TRUE, xlim=range(A, B))
plot(ecdf(B), do.points=FALSE, verticals=TRUE, add=TRUE)
ks.test(A, B)

# Chapter 9 Grouping, loops and conditional execution

# { } does grouping
# Usefulness of loops: for >> while >> repeat
for (i in 1:10) {
  print(2*i)
}

for (i in 1:10) print(2*i)

#while (    ) {
## Statements
#}

# # if ~ else ~
# if (   ) {
# # Statements 1
# } else {
# # Statements 2
# }
# 
# if (    ) # Statement1
# else # Statement2
# 
# if (   ) {
# # Statements 1
# } else if (   ) {
# # Statements 2
# } else if (   ) {
# # Statements 3
# } else {
# # Statements 4  
# }

   
#
#

# Chapter 10 Writing your own functions

Square = function(x=0)
{
  return(x*x)
}

twosam = function(y1, y2) 
{
  n1  = length(y1)
  n2  = length(y2)
  yb1 = mean(y1)
  yb2 = mean(y2)
  s1  = var(y1)
  s2  = var(y2) 
  s   = ((n1 - 1)*s1 + (n2 - 1)*s2)/(n1 + n2 - 2)
  tst = (yb1 - yb2)/sqrt(s*(1/n1 + 1/n2))
  return (tst)
}

x = rnorm(10)
y = rt(10, 5)

twosam(x, y)

T.test = function(y1, y2) 
{
  n1  = length(y1)
  n2  = length(y2)
  yb1 = mean(y1)
  yb2 = mean(y2)
  s1  = var(y1)
  s2  = var(y2) 
  s   = ((n1 - 1)*s1 + (n2 - 1)*s2)/(n1 + n2 - 2)

  tst = (yb1 - yb2)/sqrt(s*(1/n1 + 1/n2))
  DF = n1 + n2 - 2
  p.val = 2*(1 - pt(abs(tst), df=DF))
  
  Res = list(tst, DF, p.val, yb1, yb2)
  names(Res) = c("t", "df", "p-value", "mean of x", "mean of y")
  
  return (Res)
}

res = T.test(x, y)
t.test(x, y)


bslash = function(X, y) 
{
  X = qr(X)
  return (qr.coef(X, y))
}

regcoeff = bslash(Xmat, yvar)


"%^%" = function(S, pow) with(eigen(S), vectors %*% (abs(values)^pow * t(vectors)))

M = matrix(c(2,1,1,2), nrow=2) ; M
M %^% 0.5
sqrtM = M%^%0.5 ; sqrtM
sqrtM %*% sqrtM





area = function(f, a, b, eps=1.0e-06, lim=10) 
{
  fun1 = function(f, a, b, fa, fb, a0, eps, lim, fun) 
  {
  ## function 'fun1’is only visible inside 'area’
    d = (a + b)/2
    h = (b - a)/4
    fd = f(d)
    a1 = h * (fa + fd)
    a2 = h * (fd + fb)
    if (abs(a0 - a1 - a2) < eps || lim == 0)
      return (a1 + a2)
    else {
      return (fun(f, a, d, fa, fd, a1, eps, lim - 1, fun) + fun(f, d, b, fd, fb, a2, eps, lim - 1, fun))
    }
  }
  fa = f(a)
  fb = f(b)
  a0 = ((fa + fb) * (b - a))/2
  fun1(f, a, b, fa, fb, a0, eps, lim, fun1)
} 

area(dnorm, 0, 1)
integrate(dnorm, 0, 1)
pnorm(1) - pnorm(0)


f = function(x) 
{
  y = 2*x
  print(x)
  print(y)
  print(z)
}

f(1)
z = 3
f(1)


cube = function(n) {
  sq = function() n*n
  n*sq()
}

cube(5)

open.account = function(total) 
{
  list(
    deposit = function(amount) 
    {
      if(amount <= 0)
      stop("Deposits must be positive!\n")
      total <<- total + amount
      cat(amount, "deposited. Your balance is", total, "\n\n")
    },
    withdraw = function(amount) 
    {
      if(amount > total)
      stop("You don’t have that much money!\n")
      total <<- total - amount
      cat(amount, "withdrawn. Your balance is", total, "\n\n")
    },
    balance = function() 
    {
      cat("Your balance is", total, "\n\n")
    }
  )
}

ross = open.account(100)
robert = open.account(200)

ross$balance()
robert$balance()
ross$deposit(50)
ross$balance()
ross$withdraw(500)



# More basic keywords and functions
1 %in% c(1,2,3,4)
5 %in% c(1,2,3,4)
is.finite(Inf)
prod(1:3)
cummax(1:10)
cummax(10:1)
# ?xor
x = 11:20
x
which(x==3)
which(x==13)

length(x)
y = "my string"
length(y)
nchar(y)
strsplit(y, " ")
strsplit(y, " ")[[1]]
substr(y, 4, 5)


sample(1:10)
sample(1:10, 20)
sample(1:10, 20, replace=TRUE)
sample(rep(1:10,2))