# install.packages("ggplot2")
library(ggplot2)
# require(ggplot2)
# install.packages("GGally")
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
Ex1014 <- read.csv("http://www.karlin.mff.cuni.cz/~pesta/NMFM334/cvicenie/Ex1014.txt", header = TRUE, sep = " ")
Ex1014
## copper zinc protein
## 1 0.0 0 201
## 2 0.0 375 186
## 3 0.0 750 173
## 4 0.0 1125 110
## 5 0.0 1500 115
## 6 37.5 0 202
## 7 37.5 375 161
## 8 37.5 750 172
## 9 37.5 1125 138
## 10 37.5 1500 133
## 11 75.0 0 204
## 12 75.0 375 165
## 13 75.0 750 148
## 14 75.0 1125 143
## 15 75.0 1500 123
## 16 112.5 0 188
## 17 112.5 375 172
## 18 112.5 750 157
## 19 112.5 1125 115
## 20 112.5 1500 108
## 21 150.0 0 133
## 22 150.0 375 125
## 23 150.0 750 184
## 24 150.0 1125 135
## 25 150.0 1500 114
summary(Ex1014)
## copper zinc protein
## Min. : 0.0 Min. : 0 Min. :108.0
## 1st Qu.: 37.5 1st Qu.: 375 1st Qu.:125.0
## Median : 75.0 Median : 750 Median :148.0
## Mean : 75.0 Mean : 750 Mean :152.2
## 3rd Qu.:112.5 3rd Qu.:1125 3rd Qu.:173.0
## Max. :150.0 Max. :1500 Max. :204.0
dim(Ex1014)
## [1] 25 3
pairs(Ex1014)
ggpairs(Ex1014)
cor(Ex1014)
## copper zinc protein
## copper 1.0000000 0.0000000 -0.2350643
## zinc 0.0000000 1.0000000 -0.7755271
## protein -0.2350643 -0.7755271 1.0000000
hgram<-ggplot(Ex1014, aes(x=protein)) + geom_histogram()
hgram
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
hgramc<-ggplot(Ex1014, aes(x=protein, fill=factor(copper), color=factor(copper))) + geom_histogram(position="identity", alpha=0.5)
hgramc
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
attach(Ex1014)
\[ Y_i=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
summary(m.gener <- lm(protein ~ copper + zinc, data = Ex1014))
##
## Call:
## lm(formula = protein ~ copper + zinc, data = Ex1014)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.56 -5.60 5.48 9.72 41.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 195.880000 8.536978 22.945 < 2e-16 ***
## copper -0.135467 0.071990 -1.882 0.0732 .
## zinc -0.044693 0.007199 -6.208 3.01e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.09 on 22 degrees of freedom
## Multiple R-squared: 0.6567, Adjusted R-squared: 0.6255
## F-statistic: 21.04 on 2 and 22 DF, p-value: 7.806e-06
boxplot(protein~zinc)
# install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# install.packages("hrbrthemes")
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# install.packages("viridis")
library(viridis)
## Loading required package: viridisLite
Ex1014 %>%
ggplot( aes(x=factor(zinc), y=protein, fill=factor(zinc))) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE, alpha=0.6) +
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("A boxplot with jitter") +
xlab("")
Ex1014 %>%
ggplot( aes(x=protein, y=factor(zinc), fill=factor(zinc))) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Violin chart") +
xlab("")
\[ Y_i=\beta_0+\beta_{Zn}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
summary(m.zn<-lm(protein~zinc))
##
## Call:
## lm(formula = protein ~ zinc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.72 -4.68 2.56 15.28 31.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 185.720000 6.968421 26.652 < 2e-16 ***
## zinc -0.044693 0.007586 -5.891 5.27e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.12 on 23 degrees of freedom
## Multiple R-squared: 0.6014, Adjusted R-squared: 0.5841
## F-statistic: 34.71 on 1 and 23 DF, p-value: 5.267e-06
plot(protein~zinc)
abline(m.zn,col=2)
ggplot(Ex1014, aes(x = zinc, y = protein)) +
geom_point() +
stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula 'y ~ x'
anova(m.zn,m.gener)
## Analysis of Variance Table
##
## Model 1: protein ~ zinc
## Model 2: protein ~ copper + zinc
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 23 9307.1
## 2 22 8016.8 1 1290.3 3.5409 0.07317 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m.gener)
## Analysis of Variance Table
##
## Response: protein
## Df Sum Sq Mean Sq F value Pr(>F)
## copper 1 1290.3 1290.3 3.5409 0.07317 .
## zinc 1 14044.9 14044.9 38.5425 3.006e-06 ***
## Residuals 22 8016.8 364.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[ Y_i=\beta_0+\beta_{Cu[37.5]}1_{X_{i,Cu}=37.5}+\beta_{Cu[75]}1_{X_{i,Cu}=75}+\beta_{Cu[112.5]}1_{X_{i,Cu}=112.5}+\beta_{Cu[150]}1_{X_{i,Cu}=150}+\beta_{Zn}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
Copper<-factor(copper)
summary(Copper)
## 0 37.5 75 112.5 150
## 5 5 5 5 5
summary(m.factor<-lm(protein~Copper+zinc),cor=T)
##
## Call:
## lm(formula = protein ~ Copper + zinc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.72 -8.48 5.32 10.48 45.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 190.520000 10.598256 17.977 2.19e-13 ***
## Copper37.5 4.200000 12.667339 0.332 0.744
## Copper75 -0.400000 12.667339 -0.032 0.975
## Copper112.5 -9.000000 12.667339 -0.710 0.486
## Copper150 -18.800000 12.667339 -1.484 0.154
## zinc -0.044693 0.007553 -5.917 1.07e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.03 on 19 degrees of freedom
## Multiple R-squared: 0.6736, Adjusted R-squared: 0.5877
## F-statistic: 7.842 on 5 and 19 DF, p-value: 0.0003767
##
## Correlation of Coefficients:
## (Intercept) Copper37.5 Copper75 Copper112.5 Copper150
## Copper37.5 -0.60
## Copper75 -0.60 0.50
## Copper112.5 -0.60 0.50 0.50
## Copper150 -0.60 0.50 0.50 0.50
## zinc -0.53 0.00 0.00 0.00 0.00
\[ Y_i=\beta_0+\beta_{Zn[375]}1_{X_{i,Zn}=375}+\beta_{Zn[750]}1_{X_{i,Zn}=750}+\beta_{Zn[1125]}1_{X_{i,Zn}=1125}+\beta_{Zn[1500]}1_{X_{i,Zn}=1500}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
Zinc<-factor(zinc)
summary(lm(protein~Zinc))
##
## Call:
## lm(formula = protein ~ Zinc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.6 -9.8 4.4 14.4 24.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 185.600 8.788 21.120 3.81e-15 ***
## Zinc375 -23.800 12.428 -1.915 0.069901 .
## Zinc750 -18.800 12.428 -1.513 0.145996
## Zinc1125 -57.400 12.428 -4.619 0.000166 ***
## Zinc1500 -67.000 12.428 -5.391 2.82e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.65 on 20 degrees of freedom
## Multiple R-squared: 0.6693, Adjusted R-squared: 0.6031
## F-statistic: 10.12 on 4 and 20 DF, p-value: 0.0001204
anova(lm(protein~Zinc))
## Analysis of Variance Table
##
## Response: protein
## Df Sum Sq Mean Sq F value Pr(>F)
## Zinc 4 15629.2 3907.3 10.119 0.0001204 ***
## Residuals 20 7722.8 386.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
interaction.plot(zinc,copper,protein)
Závisí na poradí členov.
anova(lm(protein~copper*zinc))
## Analysis of Variance Table
##
## Response: protein
## Df Sum Sq Mean Sq F value Pr(>F)
## copper 1 1290.3 1290.3 4.1418 0.05465 .
## zinc 1 14044.9 14044.9 45.0828 1.21e-06 ***
## copper:zinc 1 1474.6 1474.6 4.7332 0.04115 *
## Residuals 21 6542.2 311.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hodnotí prínos daného členu po adjustancii voči ostatným členom, ktoré ho neobsahujú. Teda z úplného modelu najprv vylúčime tento člen a všetky ďalšie, ktoré ho obsahujú.
# install.packages("car")
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
Anova(lm(protein~copper*zinc))
## Anova Table (Type II tests)
##
## Response: protein
## Sum Sq Df F value Pr(>F)
## copper 1290.3 1 4.1418 0.05465 .
## zinc 14044.9 1 45.0828 1.21e-06 ***
## copper:zinc 1474.6 1 4.7332 0.04115 *
## Residuals 6542.2 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hodnotí prínos daného členu po adjustancii voči ostatným členomlenom bez ohľadu na jeho rád. Teda z úplného modelu najprv vylúčime tento člen (a je prevedená adjustancia) a všetky ostatné zostanú (vrátene prípadných inetrakcí, v ktorých sa vyskytuje)
Anova(lm(protein~copper*zinc),type="III")
## Anova Table (Type III tests)
##
## Response: protein
## Sum Sq Df F value Pr(>F)
## (Intercept) 123951 1 397.8713 3.956e-15 ***
## copper 2714 1 8.7105 0.007622 **
## zinc 9955 1 31.9554 1.306e-05 ***
## copper:zinc 1475 1 4.7332 0.041151 *
## Residuals 6542 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m.cu.zn1500<-lm(protein~copper+copper:(Zinc==1500)))
##
## Call:
## lm(formula = protein ~ copper + copper:(Zinc == 1500))
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.360 -17.447 1.118 17.075 46.597
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 162.36000 9.90881 16.385 8.2e-14 ***
## copper -0.06609 0.11228 -0.589 0.5621
## copper:Zinc == 1500TRUE -0.34689 0.15570 -2.228 0.0364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.6 on 22 degrees of freedom
## Multiple R-squared: 0.2292, Adjusted R-squared: 0.1591
## F-statistic: 3.27 on 2 and 22 DF, p-value: 0.05709
\[ Y_i=\beta_0+\beta_{Cu,1}X_{i,Cu}+\beta_{Cu,2}X_{i,Cu}^2+\beta_{Zn,1}X_{i,Zn}+\beta_{Zn,2}X_{i,Zn}^2+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
summary(m.inter22<-lm(protein~copper+zinc+I(copper^2)+I(zinc^2)+I(zinc*copper)))
##
## Call:
## lm(formula = protein ~ copper + zinc + I(copper^2) + I(zinc^2) +
## I(zinc * copper))
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.434 -10.943 1.920 8.589 44.360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.045e+02 1.233e+01 16.580 9.34e-13 ***
## copper -9.646e-02 2.565e-01 -0.376 0.7111
## zinc -5.359e-02 2.565e-02 -2.089 0.0504 .
## I(copper^2) -1.625e-03 1.522e-03 -1.068 0.2990
## I(zinc^2) -7.721e-06 1.522e-05 -0.507 0.6178
## I(zinc * copper) 2.731e-04 1.274e-04 2.144 0.0452 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.91 on 19 degrees of freedom
## Multiple R-squared: 0.739, Adjusted R-squared: 0.6704
## F-statistic: 10.76 on 5 and 19 DF, p-value: 5.074e-05
summary(m.inter22<-lm(protein~copper*zinc+I(copper^2)+I(zinc^2)))
##
## Call:
## lm(formula = protein ~ copper * zinc + I(copper^2) + I(zinc^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.434 -10.943 1.920 8.589 44.360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.045e+02 1.233e+01 16.580 9.34e-13 ***
## copper -9.646e-02 2.565e-01 -0.376 0.7111
## zinc -5.359e-02 2.565e-02 -2.089 0.0504 .
## I(copper^2) -1.625e-03 1.522e-03 -1.068 0.2990
## I(zinc^2) -7.721e-06 1.522e-05 -0.507 0.6178
## copper:zinc 2.731e-04 1.274e-04 2.144 0.0452 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.91 on 19 degrees of freedom
## Multiple R-squared: 0.739, Adjusted R-squared: 0.6704
## F-statistic: 10.76 on 5 and 19 DF, p-value: 5.074e-05
\[ Y_i=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn,1}X_{i,Zn}+\beta_{Zn,2}X_{i,Zn}^2+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
summary(m.inter12<-lm(protein~copper+zinc+I(zinc^2)+I(zinc*copper)))
##
## Call:
## lm(formula = protein ~ copper + zinc + I(zinc^2) + I(zinc * copper))
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.006 -8.069 0.954 8.469 39.789
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.091e+02 1.161e+01 18.012 7.89e-14 ***
## copper -3.403e-01 1.174e-01 -2.899 0.00888 **
## zinc -5.359e-02 2.574e-02 -2.082 0.05043 .
## I(zinc^2) -7.721e-06 1.527e-05 -0.505 0.61877
## I(zinc * copper) 2.731e-04 1.278e-04 2.137 0.04517 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.97 on 20 degrees of freedom
## Multiple R-squared: 0.7234, Adjusted R-squared: 0.6681
## F-statistic: 13.08 on 4 and 20 DF, p-value: 2.16e-05
\[ Y_i=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn}X_{i,Zn}+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
summary(m.inter11<-lm(protein~copper+zinc+I(zinc*copper)))
##
## Call:
## lm(formula = protein ~ copper + zinc + I(zinc * copper))
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.92 -10.24 1.52 10.64 41.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.112e+02 1.059e+01 19.947 3.96e-15 ***
## copper -3.403e-01 1.153e-01 -2.951 0.00762 **
## zinc -6.517e-02 1.153e-02 -5.653 1.31e-05 ***
## I(zinc * copper) 2.731e-04 1.255e-04 2.176 0.04115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.65 on 21 degrees of freedom
## Multiple R-squared: 0.7198, Adjusted R-squared: 0.6798
## F-statistic: 17.99 on 3 and 21 DF, p-value: 5.148e-06
anova(m.inter11,m.inter12,m.inter22)
## Analysis of Variance Table
##
## Model 1: protein ~ copper + zinc + I(zinc * copper)
## Model 2: protein ~ copper + zinc + I(zinc^2) + I(zinc * copper)
## Model 3: protein ~ copper * zinc + I(copper^2) + I(zinc^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 21 6542.2
## 2 20 6459.7 1 82.51 0.2573 0.6178
## 3 19 6094.0 1 365.71 1.1402 0.2990
protein.log<-log(protein)
summary(m.log<-lm(protein.log~copper+zinc+I(zinc*copper)))
##
## Call:
## lm(formula = protein.log ~ copper + zinc + I(zinc * copper))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.211488 -0.078442 0.004381 0.084730 0.273805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.382e+00 7.272e-02 74.007 < 2e-16 ***
## copper -2.076e-03 7.917e-04 -2.623 0.0159 *
## zinc -4.176e-04 7.917e-05 -5.275 3.14e-05 ***
## I(zinc * copper) 1.636e-06 8.619e-07 1.898 0.0716 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1212 on 21 degrees of freedom
## Multiple R-squared: 0.6986, Adjusted R-squared: 0.6556
## F-statistic: 16.23 on 3 and 21 DF, p-value: 1.093e-05
\[ Y^{(\lambda)}=\begin{cases} \frac{Y^{\lambda}-1}{\lambda},\quad \lambda\neq 0;\\ \log\lambda,\quad \lambda = 0. \end{cases} \]
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
boxcox(lm(protein~copper+zinc+I(copper*zinc)),seq(-1.5,3.5,1/50))
title(main="Funkcia boxcox pre model s interakciami")
bc<-boxcox(lm(protein~copper+zinc+I(copper*zinc)),seq(1,1.1,1/100))
logmax<-max(bc$y,na.rm=F)
logmax
## [1] 15.29793
i<-1
while(bc$y[i]!=logmax){lambda.optimal.L<-bc$x[i];i<-i+1}
lambda.optimal.L
## [1] 1.062626
i<-100
while(bc$y[i]!=logmax){lambda.optimal.U<-bc$x[i];i<-i-1}
lambda.optimal.U
## [1] 1.064646
(lambda.hat<-(lambda.optimal.L+lambda.optimal.U)/2)
## [1] 1.063636
\[ \frac{Y_i^{\lambda}-1}{\lambda}=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn}X_{i,Zn}+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]
protein.lambda<-(protein^lambda.hat - 1)/lambda.hat
summary(m.lambda<-lm(protein.lambda~copper+zinc+I(zinc*copper)))
##
## Call:
## lm(formula = protein.lambda ~ copper + zinc + I(zinc * copper))
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.116 -13.948 2.365 14.452 57.826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.778e+02 1.456e+01 19.081 9.58e-15 ***
## copper -4.709e-01 1.585e-01 -2.971 0.00729 **
## zinc -8.994e-02 1.585e-02 -5.674 1.24e-05 ***
## I(zinc * copper) 3.783e-04 1.726e-04 2.192 0.03977 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.27 on 21 degrees of freedom
## Multiple R-squared: 0.7209, Adjusted R-squared: 0.681
## F-statistic: 18.08 on 3 and 21 DF, p-value: 4.948e-06
plot(m.inter11, which=1:6)
plot(m.inter11$resid~m.inter11$fitted)
abline(h=0,col="blue")
plot((m.inter11$resid)^2~m.inter11$fitted)
qqnorm(m.inter11$resid)
qqline(m.inter11$resid,col="red")
summary(m.inter.infl<-influence.measures(m.inter11))
## Potentially influential observations of
## lm(formula = protein ~ copper + zinc + I(zinc * copper)) :
##
## dfb.1_ dfb.cppr dfb.zinc dfb.I(*c dffit cov.r cook.d hat
## 1 -0.54 0.44 0.44 -0.36 -0.54 1.72_* 0.07 0.36
## 5 -0.03 0.02 0.06 -0.05 0.08 1.90_* 0.00 0.36
## 21 0.52 -1.27_* -0.42 1.04_* -1.55_* 0.87 0.52 0.36
## 23 -0.21 0.52 0.00 0.00 1.10 0.32_* 0.22 0.12
## 25 -0.06 0.14 0.14 -0.35 -0.52 1.73_* 0.07 0.36
2*(1-pt(abs(rstudent(m.inter11)[21]),25-3-1))
## 21
## 0.05078931
2*(1-pt(max(abs(rstudent(m.inter11))),25-3-1))*25
## [1] 0.1833379
VIF <- function(lmobj)
# pocita diagnosticke statistiky souvisejici s multikolinearitou
# zalozene na korelacni matici
# predpoklada absolutni clen
{
if (!is.null(weights(lmobj)))
stop("requires unweighted model")
if (!(any(names(coefficients(lmobj))=="(Intercept)")))
stop("requires model with intercept")
X0 <- scale(model.matrix(lmobj))[,-1] # standardizace regresoru
nam <- labels(terms(lmobj))[-1]
# nam <- term.names(lmobj)[-1]
y0 <- scale(lmobj$model[,1]) # standardizace regresandu
lmobj0 <- lm(y0~X0) # standardizovana regrese
VIF <- diag(solve(cor(X0)))
tol <- 1/VIF; R2 <- 1-tol
b.star <- coef(lmobj0)[-1]
out <- cbind(b.star,VIF,R2,tol)
# rownames(out) <- term.names(lmobj)[-1]
return(out)
}
VIF(m.inter11)
## b.star VIF R2 tol
## X0copper -0.5904371 3 0.6666667 0.3333333
## X0zinc -1.1308999 3 0.6666667 0.3333333
## X0I(zinc * copper) 0.5618937 5 0.8000000 0.2000000
# install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
gqtest(m.inter11)
##
## Goldfeld-Quandt test
##
## data: m.inter11
## GQ = 1.9537, df1 = 9, df2 = 8, p-value = 0.1792
## alternative hypothesis: variance increases from segment 1 to 2
bptest(m.inter11)
##
## studentized Breusch-Pagan test
##
## data: m.inter11
## BP = 5.3827, df = 3, p-value = 0.1458
shapiro.test(resid(m.inter11))
##
## Shapiro-Wilk normality test
##
## data: resid(m.inter11)
## W = 0.9638, p-value = 0.4953
# install.packages("moments")
library(moments)
agostino.test(resid(m.inter11))
##
## D'Agostino skewness test
##
## data: resid(m.inter11)
## skew = 0.18669, z = 0.45394, p-value = 0.6499
## alternative hypothesis: data have a skewness
jarque.test(resid(m.inter11))
##
## Jarque-Bera Normality Test
##
## data: resid(m.inter11)
## JB = 0.1583, p-value = 0.9239
## alternative hypothesis: greater
# install.packages("nortest")
library(nortest)
lillie.test(resid(m.inter11))
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: resid(m.inter11)
## D = 0.094108, p-value = 0.8219
dwtest(m.inter11)
##
## Durbin-Watson test
##
## data: m.inter11
## DW = 1.9314, p-value = 0.2912
## alternative hypothesis: true autocorrelation is greater than 0
final m.inter11
# install.packages("sjPlot")
# install.packages("sjmisc")
library(sjPlot)
## Registered S3 method overwritten by 'parameters':
## method from
## format.parameters_distribution datawizard
library(sjmisc)
##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
plot_model(m.inter11, type = "pred", terms = c("copper", "zinc"))
plot_model(m.inter11, type = "pred", terms = c("zinc", "copper"))