# R3.4.4
# 2019年度 現代物理学II:統計熱力学
# 成績=定期試験得点+レポート点
# 成績の分布と一般化線形回帰分析(成績~出席日数+欠席日数+遅刻回数)
library(lars)
## Loaded lars 1.2
library(dplyr)
seiseki<- read.csv ( "c:/RW/2019_seiseki.csv", header = TRUE, fileEncoding ="Shift-JIS")
seiseki %>% head()
## レポート点 定期試験 総合点 出 欠 遅
## 1 15 36 51 11 4 0
## 2 0 0 0 5 10 1
## 3 0 46 46 9 7 0
## 4 20 56 76 12 4 0
## 5 20 47 67 16 0 0
## 6 15 55 70 16 0 0
seiseki %>% data.frame()->d.df
#data
rscore<-d.df[,1] #レポート点
sscore<- d.df[,2] #試験の得点
tscore<-d.df[,3] #成績
syusseki<- d.df[,4] #出席日数
kesseki<- d.df[,5] #欠席日数
chikoku<- d.df[,6] #遅刻回数
gsyusseki<-syusseki+chikoku #遅刻を出席日数にする
#mean
print("平均点")
## [1] "平均点"
(tscore %>% mean() %>% round(1))
## [1] 71.8
#sd
print("標準偏差")
## [1] "標準偏差"
(tscore %>% sd() %>% round(1))
## [1] 21.6
#相関係数マトリックス
#dev.new()
psych::pairs.panels(d.df[,2:6], cex= 1)
#histgram
tscore %>% hist(xlim = c(40, 100), freq = TRUE)
abline(v=c(mean(tscore)-sd(tscore), mean(tscore),
mean(tscore)+sd(tscore)), col=c(1,2,1), lty=c(2,2,2))
#density plot
plot(tscore %>% density())
abline(v=c(mean(tscore)-sd(tscore), mean(tscore),
mean(tscore)+sd(tscore)), col=c(1,2,1), lty=c(2,2,2))
#Syusseki glm analysis
result.glm<- glm(tscore~syusseki-1, data=d.df, family=gaussian)
result2.glm<- glm(tscore~gsyusseki-1, data=d.df, family=gaussian)
result.glm %>% summary()
##
## Call:
## glm(formula = tscore ~ syusseki - 1, family = gaussian, data = d.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -33.628 -10.819 -0.010 9.529 37.608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## syusseki 5.5392 0.1086 51.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 198.0122)
##
## Null deviance: 534088 on 95 degrees of freedom
## Residual deviance: 18613 on 94 degrees of freedom
## AIC: 774.98
##
## Number of Fisher Scoring iterations: 2
result2.glm %>% summary()
##
## Call:
## glm(formula = tscore ~ gsyusseki - 1, family = gaussian, data = d.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -32.419 -9.539 -0.644 9.453 38.969
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## gsyusseki 5.4031 0.1013 53.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 181.8624)
##
## Null deviance: 534088 on 95 degrees of freedom
## Residual deviance: 17095 on 94 degrees of freedom
## AIC: 766.9
##
## Number of Fisher Scoring iterations: 2
result.glm %>% residuals(type="response") %>% sd() %>% round(2)->SD1
result2.glm %>% residuals(type="response")%>% sd() %>% round(2)->SD2
SD1
## [1] 14.07
SD2
## [1] 13.49
#dev.new()
par(mfrow=c(2,2))
result.glm %>% plot()
#dev.new()
par(mfrow=c(2,2))
result2.glm %>% plot()
#plot
#dev.new()
par(mfrow=c(2,2))
plot(syusseki, tscore, xlim=c(0, 16), ylim=c(0, 100),
xlab="出席日数", ylab="成 績", main="出席回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(result.glm, lty=1)
text(syusseki, tscore+2, cex=0.5)
legend("bottomright", legend=paste("sd=",SD1, sep=""))
plot(gsyusseki, tscore, xlim=c(0, 16), ylim=c(0, 100),
xlab="出席日数", ylab="成 績", main="出席+遅刻回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(result2.glm, lty=1)
text(gsyusseki, tscore+2, cex=0.5)
legend("bottomright", legend=paste("sd=",SD2, sep=""))
plot(tscore, predict(result.glm), xlim=c(0, 100), ylim=c(0, 100),
xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(result.glm)+2, cex=0.5)
residuals(result.glm, type="response") %>% sd() %>% round(2)->SD1
legend("topleft", legend=paste("sd=",SD1, sep=""))
plot(tscore, predict(result2.glm), xlim=c(0, 100), ylim=c(0, 100),
xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(result2.glm)+2, cex=0.5)
residuals(result2.glm, type="response") %>% sd() %>% round(2)->SD2
legend("topleft", legend=paste("sd=",SD2, sep=""))
#Kessseki glm
kresult.glm<- glm(tscore~kesseki, data=d.df, family=gaussian)
#chikoku glm
cresult.glm<- glm(tscore~chikoku, data=d.df, family=gaussian)
#plot
#dev.new()
par(mfrow=c(2,2))
plot(kesseki, tscore, xlim=c(0, 16), ylim=c(0, 100),
xlab="欠席日数", ylab="成 績", main="欠席回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(kresult.glm, lty=1)
text(kesseki, tscore+2, cex=0.5)
residuals(kresult.glm, type="response") %>% sd() %>% round(2)->SD3
#legend("bottomright", legend=paste("sd=",SD3, sep=""))
plot(chikoku, tscore, xlim=c(0, 5), ylim=c(0, 100),
xlab="遅刻日数", ylab="成 績", main="遅刻回数")
abline(h=c(59, 69, 79, 89, 100), col=c(2, 1, 1, 1, 1))
abline(cresult.glm, lty=1)
text(chikoku, tscore+2, cex=0.5)
residuals(cresult.glm, type="response") %>% sd() %>% round(2)->SD4
#legend("bottomright", legend=paste("sd=",SD4, sep=""))
plot(tscore, predict(kresult.glm), xlim=c(0, 100), ylim=c(0, 100),
xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(kresult.glm)+2, cex=0.5)
residuals(result.glm, type="response") %>% sd() %>% round(2)->SD3
legend("topleft", legend=paste("sd=",SD3, sep=""))
plot(tscore, predict(cresult.glm), xlim=c(0, 100), ylim=c(0, 100),
xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(cresult.glm)+2, cex=0.5)
residuals(result2.glm, type="response") %>% sd() %>% round(2)->SD4
legend("topleft", legend=paste("sd=",SD4, sep=""))
#all
aresult.glm<- glm(tscore~syusseki+kesseki+chikoku-1, data=d.df, family=gaussian)
aresult.glm %>% summary()
##
## Call:
## glm(formula = tscore ~ syusseki + kesseki + chikoku - 1, family = gaussian,
## data = d.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -32.478 -9.802 -0.840 9.550 38.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## syusseki 5.40976 0.12424 43.542 < 2e-16 ***
## kesseki 0.05154 0.40585 0.127 0.89922
## chikoku 4.91400 1.74014 2.824 0.00582 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 185.6406)
##
## Null deviance: 534088 on 95 degrees of freedom
## Residual deviance: 17079 on 92 degrees of freedom
## AIC: 770.81
##
## Number of Fisher Scoring iterations: 2
#dev.new()
par(mfrow=c(1,2))
plot(tscore, predict(aresult.glm), xlim=c(0, 100), ylim=c(0, 100),
xlab="成 績", ylab="推定成績", main="回帰相関図")
abline(h=59, col=2)
abline(v=59, col=2)
abline(0,1, lty=1)
text(tscore, predict(aresult.glm)+2, cex=0.5)
residuals(aresult.glm, type="response") %>% sd() %>% round(2)->SD5
legend("topleft", legend=paste("sd=",SD5, sep=""))
plot(predict(aresult.glm), residuals(aresult.glm), xlim=c(0, 100), ylim=c(-50, 50),
xlab="推定成績", ylab="回帰残差", main="回帰残差図")
abline(h=c(-SD5, 0, SD5), lty=c(2,1,2))
abline(v=59, col=2)
text(predict(aresult.glm), residuals(aresult.glm)+2, cex=0.5)
residuals(aresult.glm, type="response") %>% sd() %>% round(2)->SD5
legend("topleft", legend=paste("sd=", SD5, sep=""))