Knižnice

# install.packages("ggplot2")
library(ggplot2)
# require(ggplot2)
# install.packages("GGally")
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Deskriptívne štatistiky a exploratórna analýza

Ex1014 <- read.csv("http://www.karlin.mff.cuni.cz/~pesta/NMFM334/cvicenie/Ex1014.txt", header = TRUE, sep = " ")
Ex1014
##    copper zinc protein
## 1     0.0    0     201
## 2     0.0  375     186
## 3     0.0  750     173
## 4     0.0 1125     110
## 5     0.0 1500     115
## 6    37.5    0     202
## 7    37.5  375     161
## 8    37.5  750     172
## 9    37.5 1125     138
## 10   37.5 1500     133
## 11   75.0    0     204
## 12   75.0  375     165
## 13   75.0  750     148
## 14   75.0 1125     143
## 15   75.0 1500     123
## 16  112.5    0     188
## 17  112.5  375     172
## 18  112.5  750     157
## 19  112.5 1125     115
## 20  112.5 1500     108
## 21  150.0    0     133
## 22  150.0  375     125
## 23  150.0  750     184
## 24  150.0 1125     135
## 25  150.0 1500     114
summary(Ex1014)
##      copper           zinc         protein     
##  Min.   :  0.0   Min.   :   0   Min.   :108.0  
##  1st Qu.: 37.5   1st Qu.: 375   1st Qu.:125.0  
##  Median : 75.0   Median : 750   Median :148.0  
##  Mean   : 75.0   Mean   : 750   Mean   :152.2  
##  3rd Qu.:112.5   3rd Qu.:1125   3rd Qu.:173.0  
##  Max.   :150.0   Max.   :1500   Max.   :204.0
dim(Ex1014)
## [1] 25  3
pairs(Ex1014)

ggpairs(Ex1014)

cor(Ex1014)
##             copper       zinc    protein
## copper   1.0000000  0.0000000 -0.2350643
## zinc     0.0000000  1.0000000 -0.7755271
## protein -0.2350643 -0.7755271  1.0000000
hgram<-ggplot(Ex1014, aes(x=protein)) + geom_histogram()
hgram
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hgramc<-ggplot(Ex1014, aes(x=protein, fill=factor(copper), color=factor(copper))) + geom_histogram(position="identity", alpha=0.5)
hgramc
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

attach(Ex1014)

Modelovanie

Linárny model lineárnej regresie

\[ Y_i=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

summary(m.gener <- lm(protein ~ copper + zinc, data = Ex1014))
## 
## Call:
## lm(formula = protein ~ copper + zinc, data = Ex1014)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -42.56  -5.60   5.48   9.72  41.96 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 195.880000   8.536978  22.945  < 2e-16 ***
## copper       -0.135467   0.071990  -1.882   0.0732 .  
## zinc         -0.044693   0.007199  -6.208 3.01e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.09 on 22 degrees of freedom
## Multiple R-squared:  0.6567, Adjusted R-squared:  0.6255 
## F-statistic: 21.04 on 2 and 22 DF,  p-value: 7.806e-06

Linárny podmodel lineárnej regresie

boxplot(protein~zinc)

# install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# install.packages("hrbrthemes")
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# install.packages("viridis")
library(viridis)
## Loading required package: viridisLite
Ex1014 %>%
  ggplot( aes(x=factor(zinc), y=protein, fill=factor(zinc))) +
    geom_boxplot() +
    scale_fill_viridis(discrete = TRUE, alpha=0.6) +
    geom_jitter(color="black", size=0.4, alpha=0.9) +
    theme_ipsum() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("A boxplot with jitter") +
    xlab("")

Ex1014 %>%
  ggplot( aes(x=protein, y=factor(zinc), fill=factor(zinc))) +
    geom_violin() +
    scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") +
    theme_ipsum() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Violin chart") +
    xlab("")

\[ Y_i=\beta_0+\beta_{Zn}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

summary(m.zn<-lm(protein~zinc))
## 
## Call:
## lm(formula = protein ~ zinc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -52.72  -4.68   2.56  15.28  31.80 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 185.720000   6.968421  26.652  < 2e-16 ***
## zinc         -0.044693   0.007586  -5.891 5.27e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.12 on 23 degrees of freedom
## Multiple R-squared:  0.6014, Adjusted R-squared:  0.5841 
## F-statistic: 34.71 on 1 and 23 DF,  p-value: 5.267e-06

Grafická reprezentácia

plot(protein~zinc)
abline(m.zn,col=2)

ggplot(Ex1014, aes(x = zinc, y = protein)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula 'y ~ x'

Analýzy rozptylu jednoduchého triedenia

anova(m.zn,m.gener)
## Analysis of Variance Table
## 
## Model 1: protein ~ zinc
## Model 2: protein ~ copper + zinc
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     23 9307.1                              
## 2     22 8016.8  1    1290.3 3.5409 0.07317 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m.gener)
## Analysis of Variance Table
## 
## Response: protein
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## copper     1  1290.3  1290.3  3.5409   0.07317 .  
## zinc       1 14044.9 14044.9 38.5425 3.006e-06 ***
## Residuals 22  8016.8   364.4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Faktorizovaný regresor

\[ Y_i=\beta_0+\beta_{Cu[37.5]}1_{X_{i,Cu}=37.5}+\beta_{Cu[75]}1_{X_{i,Cu}=75}+\beta_{Cu[112.5]}1_{X_{i,Cu}=112.5}+\beta_{Cu[150]}1_{X_{i,Cu}=150}+\beta_{Zn}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

Copper<-factor(copper)
summary(Copper)
##     0  37.5    75 112.5   150 
##     5     5     5     5     5
summary(m.factor<-lm(protein~Copper+zinc),cor=T)
## 
## Call:
## lm(formula = protein ~ Copper + zinc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -38.72  -8.48   5.32  10.48  45.80 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 190.520000  10.598256  17.977 2.19e-13 ***
## Copper37.5    4.200000  12.667339   0.332    0.744    
## Copper75     -0.400000  12.667339  -0.032    0.975    
## Copper112.5  -9.000000  12.667339  -0.710    0.486    
## Copper150   -18.800000  12.667339  -1.484    0.154    
## zinc         -0.044693   0.007553  -5.917 1.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.03 on 19 degrees of freedom
## Multiple R-squared:  0.6736, Adjusted R-squared:  0.5877 
## F-statistic: 7.842 on 5 and 19 DF,  p-value: 0.0003767
## 
## Correlation of Coefficients:
##             (Intercept) Copper37.5 Copper75 Copper112.5 Copper150
## Copper37.5  -0.60                                                
## Copper75    -0.60        0.50                                    
## Copper112.5 -0.60        0.50       0.50                         
## Copper150   -0.60        0.50       0.50     0.50                
## zinc        -0.53        0.00       0.00     0.00        0.00

\[ Y_i=\beta_0+\beta_{Zn[375]}1_{X_{i,Zn}=375}+\beta_{Zn[750]}1_{X_{i,Zn}=750}+\beta_{Zn[1125]}1_{X_{i,Zn}=1125}+\beta_{Zn[1500]}1_{X_{i,Zn}=1500}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

Zinc<-factor(zinc)
summary(lm(protein~Zinc))
## 
## Call:
## lm(formula = protein ~ Zinc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -52.6   -9.8    4.4   14.4   24.2 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  185.600      8.788  21.120 3.81e-15 ***
## Zinc375      -23.800     12.428  -1.915 0.069901 .  
## Zinc750      -18.800     12.428  -1.513 0.145996    
## Zinc1125     -57.400     12.428  -4.619 0.000166 ***
## Zinc1500     -67.000     12.428  -5.391 2.82e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.65 on 20 degrees of freedom
## Multiple R-squared:  0.6693, Adjusted R-squared:  0.6031 
## F-statistic: 10.12 on 4 and 20 DF,  p-value: 0.0001204
anova(lm(protein~Zinc))
## Analysis of Variance Table
## 
## Response: protein
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Zinc       4 15629.2  3907.3  10.119 0.0001204 ***
## Residuals 20  7722.8   386.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Lineárny model s interakciami

interaction.plot(zinc,copper,protein)

Rozklad typu I

Závisí na poradí členov.

anova(lm(protein~copper*zinc))
## Analysis of Variance Table
## 
## Response: protein
##             Df  Sum Sq Mean Sq F value   Pr(>F)    
## copper       1  1290.3  1290.3  4.1418  0.05465 .  
## zinc         1 14044.9 14044.9 45.0828 1.21e-06 ***
## copper:zinc  1  1474.6  1474.6  4.7332  0.04115 *  
## Residuals   21  6542.2   311.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rozklad typu II

Hodnotí prínos daného členu po adjustancii voči ostatným členom, ktoré ho neobsahujú. Teda z úplného modelu najprv vylúčime tento člen a všetky ďalšie, ktoré ho obsahujú.

# install.packages("car")
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
Anova(lm(protein~copper*zinc))
## Anova Table (Type II tests)
## 
## Response: protein
##              Sum Sq Df F value   Pr(>F)    
## copper       1290.3  1  4.1418  0.05465 .  
## zinc        14044.9  1 45.0828 1.21e-06 ***
## copper:zinc  1474.6  1  4.7332  0.04115 *  
## Residuals    6542.2 21                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rozklad typu III

Hodnotí prínos daného členu po adjustancii voči ostatným členomlenom bez ohľadu na jeho rád. Teda z úplného modelu najprv vylúčime tento člen (a je prevedená adjustancia) a všetky ostatné zostanú (vrátene prípadných inetrakcí, v ktorých sa vyskytuje)

Anova(lm(protein~copper*zinc),type="III")
## Anova Table (Type III tests)
## 
## Response: protein
##             Sum Sq Df  F value    Pr(>F)    
## (Intercept) 123951  1 397.8713 3.956e-15 ***
## copper        2714  1   8.7105  0.007622 ** 
## zinc          9955  1  31.9554 1.306e-05 ***
## copper:zinc   1475  1   4.7332  0.041151 *  
## Residuals     6542 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interakcia len pre vyššie hodnoty

summary(m.cu.zn1500<-lm(protein~copper+copper:(Zinc==1500)))
## 
## Call:
## lm(formula = protein ~ copper + copper:(Zinc == 1500))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.360 -17.447   1.118  17.075  46.597 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             162.36000    9.90881  16.385  8.2e-14 ***
## copper                   -0.06609    0.11228  -0.589   0.5621    
## copper:Zinc == 1500TRUE  -0.34689    0.15570  -2.228   0.0364 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28.6 on 22 degrees of freedom
## Multiple R-squared:  0.2292, Adjusted R-squared:  0.1591 
## F-statistic:  3.27 on 2 and 22 DF,  p-value: 0.05709

Hierarchically Well-Formulated

\[ Y_i=\beta_0+\beta_{Cu,1}X_{i,Cu}+\beta_{Cu,2}X_{i,Cu}^2+\beta_{Zn,1}X_{i,Zn}+\beta_{Zn,2}X_{i,Zn}^2+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

summary(m.inter22<-lm(protein~copper+zinc+I(copper^2)+I(zinc^2)+I(zinc*copper)))
## 
## Call:
## lm(formula = protein ~ copper + zinc + I(copper^2) + I(zinc^2) + 
##     I(zinc * copper))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.434 -10.943   1.920   8.589  44.360 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.045e+02  1.233e+01  16.580 9.34e-13 ***
## copper           -9.646e-02  2.565e-01  -0.376   0.7111    
## zinc             -5.359e-02  2.565e-02  -2.089   0.0504 .  
## I(copper^2)      -1.625e-03  1.522e-03  -1.068   0.2990    
## I(zinc^2)        -7.721e-06  1.522e-05  -0.507   0.6178    
## I(zinc * copper)  2.731e-04  1.274e-04   2.144   0.0452 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.91 on 19 degrees of freedom
## Multiple R-squared:  0.739,  Adjusted R-squared:  0.6704 
## F-statistic: 10.76 on 5 and 19 DF,  p-value: 5.074e-05
  • alternatívne
summary(m.inter22<-lm(protein~copper*zinc+I(copper^2)+I(zinc^2)))
## 
## Call:
## lm(formula = protein ~ copper * zinc + I(copper^2) + I(zinc^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.434 -10.943   1.920   8.589  44.360 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.045e+02  1.233e+01  16.580 9.34e-13 ***
## copper      -9.646e-02  2.565e-01  -0.376   0.7111    
## zinc        -5.359e-02  2.565e-02  -2.089   0.0504 .  
## I(copper^2) -1.625e-03  1.522e-03  -1.068   0.2990    
## I(zinc^2)   -7.721e-06  1.522e-05  -0.507   0.6178    
## copper:zinc  2.731e-04  1.274e-04   2.144   0.0452 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.91 on 19 degrees of freedom
## Multiple R-squared:  0.739,  Adjusted R-squared:  0.6704 
## F-statistic: 10.76 on 5 and 19 DF,  p-value: 5.074e-05

\[ Y_i=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn,1}X_{i,Zn}+\beta_{Zn,2}X_{i,Zn}^2+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

summary(m.inter12<-lm(protein~copper+zinc+I(zinc^2)+I(zinc*copper)))
## 
## Call:
## lm(formula = protein ~ copper + zinc + I(zinc^2) + I(zinc * copper))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.006  -8.069   0.954   8.469  39.789 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.091e+02  1.161e+01  18.012 7.89e-14 ***
## copper           -3.403e-01  1.174e-01  -2.899  0.00888 ** 
## zinc             -5.359e-02  2.574e-02  -2.082  0.05043 .  
## I(zinc^2)        -7.721e-06  1.527e-05  -0.505  0.61877    
## I(zinc * copper)  2.731e-04  1.278e-04   2.137  0.04517 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.97 on 20 degrees of freedom
## Multiple R-squared:  0.7234, Adjusted R-squared:  0.6681 
## F-statistic: 13.08 on 4 and 20 DF,  p-value: 2.16e-05

\[ Y_i=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn}X_{i,Zn}+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

summary(m.inter11<-lm(protein~copper+zinc+I(zinc*copper)))
## 
## Call:
## lm(formula = protein ~ copper + zinc + I(zinc * copper))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -27.92 -10.24   1.52  10.64  41.96 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.112e+02  1.059e+01  19.947 3.96e-15 ***
## copper           -3.403e-01  1.153e-01  -2.951  0.00762 ** 
## zinc             -6.517e-02  1.153e-02  -5.653 1.31e-05 ***
## I(zinc * copper)  2.731e-04  1.255e-04   2.176  0.04115 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.65 on 21 degrees of freedom
## Multiple R-squared:  0.7198, Adjusted R-squared:  0.6798 
## F-statistic: 17.99 on 3 and 21 DF,  p-value: 5.148e-06

Testovanie podmodelov

anova(m.inter11,m.inter12,m.inter22)
## Analysis of Variance Table
## 
## Model 1: protein ~ copper + zinc + I(zinc * copper)
## Model 2: protein ~ copper + zinc + I(zinc^2) + I(zinc * copper)
## Model 3: protein ~ copper * zinc + I(copper^2) + I(zinc^2)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     21 6542.2                           
## 2     20 6459.7  1     82.51 0.2573 0.6178
## 3     19 6094.0  1    365.71 1.1402 0.2990

Model s logaritmom závislej premennej

protein.log<-log(protein)
summary(m.log<-lm(protein.log~copper+zinc+I(zinc*copper)))
## 
## Call:
## lm(formula = protein.log ~ copper + zinc + I(zinc * copper))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.211488 -0.078442  0.004381  0.084730  0.273805 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.382e+00  7.272e-02  74.007  < 2e-16 ***
## copper           -2.076e-03  7.917e-04  -2.623   0.0159 *  
## zinc             -4.176e-04  7.917e-05  -5.275 3.14e-05 ***
## I(zinc * copper)  1.636e-06  8.619e-07   1.898   0.0716 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1212 on 21 degrees of freedom
## Multiple R-squared:  0.6986, Adjusted R-squared:  0.6556 
## F-statistic: 16.23 on 3 and 21 DF,  p-value: 1.093e-05

Model s Box-Coxovou transformácciou

  • Box-Cox

\[ Y^{(\lambda)}=\begin{cases} \frac{Y^{\lambda}-1}{\lambda},\quad \lambda\neq 0;\\ \log\lambda,\quad \lambda = 0. \end{cases} \]

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
boxcox(lm(protein~copper+zinc+I(copper*zinc)),seq(-1.5,3.5,1/50))
title(main="Funkcia boxcox pre model s interakciami")

bc<-boxcox(lm(protein~copper+zinc+I(copper*zinc)),seq(1,1.1,1/100))

logmax<-max(bc$y,na.rm=F)
logmax
## [1] 15.29793
i<-1
while(bc$y[i]!=logmax){lambda.optimal.L<-bc$x[i];i<-i+1}
lambda.optimal.L
## [1] 1.062626
i<-100
while(bc$y[i]!=logmax){lambda.optimal.U<-bc$x[i];i<-i-1}
lambda.optimal.U
## [1] 1.064646
(lambda.hat<-(lambda.optimal.L+lambda.optimal.U)/2)
## [1] 1.063636

\[ \frac{Y_i^{\lambda}-1}{\lambda}=\beta_0+\beta_{Cu}X_{i,Cu}+\beta_{Zn}X_{i,Zn}+\beta_{CuZn}X_{i,Cu}X_{i,Zn}+\varepsilon_i,\quad i=1,\ldots,n(=25) \]

protein.lambda<-(protein^lambda.hat - 1)/lambda.hat
summary(m.lambda<-lm(protein.lambda~copper+zinc+I(zinc*copper)))
## 
## Call:
## lm(formula = protein.lambda ~ copper + zinc + I(zinc * copper))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.116 -13.948   2.365  14.452  57.826 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.778e+02  1.456e+01  19.081 9.58e-15 ***
## copper           -4.709e-01  1.585e-01  -2.971  0.00729 ** 
## zinc             -8.994e-02  1.585e-02  -5.674 1.24e-05 ***
## I(zinc * copper)  3.783e-04  1.726e-04   2.192  0.03977 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.27 on 21 degrees of freedom
## Multiple R-squared:  0.7209, Adjusted R-squared:  0.681 
## F-statistic: 18.08 on 3 and 21 DF,  p-value: 4.948e-06

Diagnostika

Graficky

plot(m.inter11, which=1:6)

Tvar závislosti

plot(m.inter11$resid~m.inter11$fitted)
abline(h=0,col="blue")

Rozptyl

plot((m.inter11$resid)^2~m.inter11$fitted)

Normalita

qqnorm(m.inter11$resid)
qqline(m.inter11$resid,col="red")

Testy

Odľahlé pozorovania

summary(m.inter.infl<-influence.measures(m.inter11))
## Potentially influential observations of
##   lm(formula = protein ~ copper + zinc + I(zinc * copper)) :
## 
##    dfb.1_ dfb.cppr dfb.zinc dfb.I(*c dffit   cov.r   cook.d hat  
## 1  -0.54   0.44     0.44    -0.36    -0.54    1.72_*  0.07   0.36
## 5  -0.03   0.02     0.06    -0.05     0.08    1.90_*  0.00   0.36
## 21  0.52  -1.27_*  -0.42     1.04_*  -1.55_*  0.87    0.52   0.36
## 23 -0.21   0.52     0.00     0.00     1.10    0.32_*  0.22   0.12
## 25 -0.06   0.14     0.14    -0.35    -0.52    1.73_*  0.07   0.36
2*(1-pt(abs(rstudent(m.inter11)[21]),25-3-1))
##         21 
## 0.05078931
2*(1-pt(max(abs(rstudent(m.inter11))),25-3-1))*25
## [1] 0.1833379

Multikolinearita

VIF <- function(lmobj)
# pocita diagnosticke statistiky souvisejici s multikolinearitou
# zalozene na korelacni matici
# predpoklada absolutni clen
{
if (!is.null(weights(lmobj)))
    stop("requires unweighted model")
if (!(any(names(coefficients(lmobj))=="(Intercept)")))
stop("requires model with intercept")
X0 <- scale(model.matrix(lmobj))[,-1] # standardizace regresoru
nam <- labels(terms(lmobj))[-1]
# nam <- term.names(lmobj)[-1]
y0 <- scale(lmobj$model[,1]) # standardizace regresandu
lmobj0 <- lm(y0~X0) # standardizovana regrese
VIF <- diag(solve(cor(X0)))
tol <- 1/VIF; R2 <- 1-tol
b.star <- coef(lmobj0)[-1]
out <- cbind(b.star,VIF,R2,tol)
# rownames(out) <- term.names(lmobj)[-1]
return(out)
}
VIF(m.inter11)
##                        b.star VIF        R2       tol
## X0copper           -0.5904371   3 0.6666667 0.3333333
## X0zinc             -1.1308999   3 0.6666667 0.3333333
## X0I(zinc * copper)  0.5618937   5 0.8000000 0.2000000

Homoskedasticita

# install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
  • Goldfeld-Quandtov test
gqtest(m.inter11)
## 
##  Goldfeld-Quandt test
## 
## data:  m.inter11
## GQ = 1.9537, df1 = 9, df2 = 8, p-value = 0.1792
## alternative hypothesis: variance increases from segment 1 to 2
  • Breusch-Paganov test
bptest(m.inter11)
## 
##  studentized Breusch-Pagan test
## 
## data:  m.inter11
## BP = 5.3827, df = 3, p-value = 0.1458

Normalita

  • Shapiro-Wilkov test
shapiro.test(resid(m.inter11))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(m.inter11)
## W = 0.9638, p-value = 0.4953
  • D’Agostinov test
# install.packages("moments")
library(moments)
agostino.test(resid(m.inter11))
## 
##  D'Agostino skewness test
## 
## data:  resid(m.inter11)
## skew = 0.18669, z = 0.45394, p-value = 0.6499
## alternative hypothesis: data have a skewness
  • Jarque–Bera test
jarque.test(resid(m.inter11))
## 
##  Jarque-Bera Normality Test
## 
## data:  resid(m.inter11)
## JB = 0.1583, p-value = 0.9239
## alternative hypothesis: greater
  • Lillieforsov (Kolmogorov-Smirnov) test
# install.packages("nortest")
library(nortest)
lillie.test(resid(m.inter11))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  resid(m.inter11)
## D = 0.094108, p-value = 0.8219

Nezávislosť

  • Durbin-Watsonov test – nejde o časový rad (nezmyslené)
dwtest(m.inter11)
## 
##  Durbin-Watson test
## 
## data:  m.inter11
## DW = 1.9314, p-value = 0.2912
## alternative hypothesis: true autocorrelation is greater than 0

Záver

final m.inter11

# install.packages("sjPlot")
# install.packages("sjmisc")
library(sjPlot)
## Registered S3 method overwritten by 'parameters':
##   method                         from      
##   format.parameters_distribution datawizard
library(sjmisc)
## 
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
## 
##     is_empty
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## The following object is masked from 'package:tibble':
## 
##     add_case
plot_model(m.inter11, type = "pred", terms = c("copper", "zinc"))

plot_model(m.inter11, type = "pred", terms = c("zinc", "copper"))