# R3.4.4
# 2019年度 現代物理学II:統計熱力学 
# 成績=定期試験得点+レポート点
# 成績の分布と一般化線形回帰分析(成績~出席日数+欠席日数+遅刻回数)

library(lars)
## Loaded lars 1.2
library(dplyr)

seiseki<- read.csv ( "c:/RW/2019_seiseki.csv", header = TRUE, fileEncoding ="Shift-JIS")
seiseki %>% head()
##   レポート点 定期試験 総合点 出 欠 遅
## 1         15       36     51 11  4  0
## 2          0        0      0  5 10  1
## 3          0       46     46  9  7  0
## 4         20       56     76 12  4  0
## 5         20       47     67 16  0  0
## 6         15       55     70 16  0  0
seiseki %>% data.frame()->d.df

#data
rscore<-d.df[,1]   #レポート点
sscore<- d.df[,2]   #試験の得点
tscore<-d.df[,3]    #成績
syusseki<- d.df[,4] #出席日数
kesseki<- d.df[,5]  #欠席日数
chikoku<- d.df[,6]  #遅刻回数
gsyusseki<-syusseki+chikoku #遅刻を出席日数にする

#mean
print("平均点")
## [1] "平均点"
(tscore %>% mean() %>% round(1))
## [1] 71.8
#sd
print("標準偏差")
## [1] "標準偏差"
(tscore %>% sd() %>% round(1))
## [1] 21.6
#相関係数マトリックス
#dev.new()
psych::pairs.panels(d.df[,2:6], cex= 1)

#histgram
tscore %>% hist(xlim = c(40, 100), freq = TRUE)
abline(v=c(mean(tscore)-sd(tscore), mean(tscore), 
           mean(tscore)+sd(tscore)), col=c(1,2,1), lty=c(2,2,2))

#density plot
plot(tscore %>% density())
abline(v=c(mean(tscore)-sd(tscore), mean(tscore), 
           mean(tscore)+sd(tscore)), col=c(1,2,1), lty=c(2,2,2))

#Syusseki glm analysis
result.glm<- glm(tscore~syusseki-1, data=d.df, family=gaussian)
result2.glm<- glm(tscore~gsyusseki-1, data=d.df, family=gaussian)

result.glm %>% summary()
## 
## Call:
## glm(formula = tscore ~ syusseki - 1, family = gaussian, data = d.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -33.628  -10.819   -0.010    9.529   37.608  
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## syusseki   5.5392     0.1086   51.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 198.0122)
## 
##     Null deviance: 534088  on 95  degrees of freedom
## Residual deviance:  18613  on 94  degrees of freedom
## AIC: 774.98
## 
## Number of Fisher Scoring iterations: 2
result2.glm %>% summary()
## 
## Call:
## glm(formula = tscore ~ gsyusseki - 1, family = gaussian, data = d.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -32.419   -9.539   -0.644    9.453   38.969  
## 
## Coefficients:
##           Estimate Std. Error t value Pr(>|t|)    
## gsyusseki   5.4031     0.1013   53.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 181.8624)
## 
##     Null deviance: 534088  on 95  degrees of freedom
## Residual deviance:  17095  on 94  degrees of freedom
## AIC: 766.9
## 
## Number of Fisher Scoring iterations: 2
result.glm %>% residuals(type="response") %>% sd() %>% round(2)->SD1
result2.glm %>% residuals(type="response")%>% sd() %>% round(2)->SD2
SD1
## [1] 14.07
SD2
## [1] 13.49
#dev.new()
par(mfrow=c(2,2))
result.glm %>% plot()

#dev.new()
par(mfrow=c(2,2))
result2.glm %>% plot()

#plot
#dev.new()
par(mfrow=c(2,2))
plot(syusseki, tscore, xlim=c(0, 16), ylim=c(0, 100), 
     xlab="出席日数", ylab="成 績", main="出席回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(result.glm, lty=1)
text(syusseki, tscore+2, cex=0.5)
legend("bottomright", legend=paste("sd=",SD1, sep=""))

plot(gsyusseki, tscore, xlim=c(0, 16), ylim=c(0, 100), 
     xlab="出席日数", ylab="成 績", main="出席+遅刻回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(result2.glm, lty=1)
text(gsyusseki, tscore+2, cex=0.5)
legend("bottomright", legend=paste("sd=",SD2, sep=""))

plot(tscore, predict(result.glm), xlim=c(0, 100), ylim=c(0, 100), 
     xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(result.glm)+2, cex=0.5)
residuals(result.glm, type="response") %>% sd() %>% round(2)->SD1
legend("topleft", legend=paste("sd=",SD1, sep=""))

plot(tscore, predict(result2.glm), xlim=c(0, 100), ylim=c(0, 100), 
     xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(result2.glm)+2, cex=0.5)
residuals(result2.glm, type="response") %>% sd() %>% round(2)->SD2
legend("topleft", legend=paste("sd=",SD2, sep=""))

#Kessseki glm
kresult.glm<- glm(tscore~kesseki, data=d.df, family=gaussian)

#chikoku glm
cresult.glm<- glm(tscore~chikoku, data=d.df, family=gaussian)

#plot
#dev.new()
par(mfrow=c(2,2))
plot(kesseki, tscore, xlim=c(0, 16), ylim=c(0, 100), 
     xlab="欠席日数", ylab="成 績", main="欠席回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(kresult.glm, lty=1)
text(kesseki, tscore+2, cex=0.5)
residuals(kresult.glm, type="response") %>% sd() %>% round(2)->SD3
#legend("bottomright", legend=paste("sd=",SD3, sep=""))

plot(chikoku, tscore, xlim=c(0, 5), ylim=c(0, 100), 
     xlab="遅刻日数", ylab="成 績", main="遅刻回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(cresult.glm, lty=1)
text(chikoku, tscore+2, cex=0.5)
residuals(cresult.glm, type="response") %>% sd() %>% round(2)->SD4
#legend("bottomright", legend=paste("sd=",SD4, sep=""))

plot(tscore, predict(kresult.glm), xlim=c(0, 100), ylim=c(0, 100), 
     xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(kresult.glm)+2, cex=0.5)
residuals(result.glm, type="response") %>% sd() %>% round(2)->SD3
legend("topleft", legend=paste("sd=",SD3, sep=""))

plot(tscore, predict(cresult.glm), xlim=c(0, 100), ylim=c(0, 100), 
     xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(cresult.glm)+2, cex=0.5)
residuals(result2.glm, type="response") %>% sd() %>% round(2)->SD4
legend("topleft", legend=paste("sd=",SD4, sep=""))

#all
aresult.glm<- glm(tscore~syusseki+kesseki+chikoku-1, data=d.df, family=gaussian)
aresult.glm %>% summary()
## 
## Call:
## glm(formula = tscore ~ syusseki + kesseki + chikoku - 1, family = gaussian, 
##     data = d.df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -32.478   -9.802   -0.840    9.550   38.593  
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## syusseki  5.40976    0.12424  43.542  < 2e-16 ***
## kesseki   0.05154    0.40585   0.127  0.89922    
## chikoku   4.91400    1.74014   2.824  0.00582 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 185.6406)
## 
##     Null deviance: 534088  on 95  degrees of freedom
## Residual deviance:  17079  on 92  degrees of freedom
## AIC: 770.81
## 
## Number of Fisher Scoring iterations: 2
#dev.new()
par(mfrow=c(1,2))
plot(tscore, predict(aresult.glm), xlim=c(0, 100), ylim=c(0, 100), 
     xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(aresult.glm)+2, cex=0.5)
residuals(aresult.glm, type="response") %>% sd() %>% round(2)->SD5
legend("topleft", legend=paste("sd=",SD5, sep=""))

plot(predict(aresult.glm), residuals(aresult.glm), xlim=c(0, 100), ylim=c(-50, 50), 
     xlab="推定成績", ylab="回帰残差", main="回帰残差図")
abline(h=c(-SD5, 0, SD5), lty=c(2,1,2))
abline(v=59, col=2)
text(predict(aresult.glm), residuals(aresult.glm)+2, cex=0.5)
residuals(aresult.glm, type="response") %>% sd() %>% round(2)->SD5
legend("topleft", legend=paste("sd=", SD5, sep=""))