6.3 Regresión y correlación
6.3.1 Regresión lineal simple
Utilizando la función lm (modelo lineal) se puede llevar a cabo, entre otras muchas cosas, una regresión lineal simple
lm(satisfac ~ fidelida, data = hatco)
##
## Call:
## lm(formula = satisfac ~ fidelida, data = hatco)
##
## Coefficients:
## (Intercept) fidelida
## 1.6074 0.0685
<- lm(satisfac ~ fidelida, data = hatco, na.action=na.exclude)
modelo summary(modelo)
##
## Call:
## lm(formula = satisfac ~ fidelida, data = hatco, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47492 -0.37341 0.09358 0.38258 1.25258
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.607399 0.322436 4.985 2.71e-06 ***
## fidelida 0.068500 0.006848 10.003 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6058 on 97 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5078, Adjusted R-squared: 0.5027
## F-statistic: 100.1 on 1 and 97 DF, p-value: < 2.2e-16
plot(hatco$fidelida, hatco$satisfac) # Cuidado con el orden de las variables
# with(hatco, plot(fidelida, satisfac)) # Alternativa empleando with
# plot(satisfac ~ fidelida, data = hatco) # Alternativa empleando fórmulas
abline(modelo)
Valores ajustados
fitted(modelo)
## 1 2 3 4 5 6 7 8
## 3.799412 4.552917 4.895419 3.799412 5.580423 4.689918 4.758418 4.621417
## 9 10 11 12 13 14 15 16
## 5.922925 5.306421 3.799412 4.826919 4.278915 4.210415 5.306421 4.963919
## 17 18 19 20 21 22 23 24
## 4.210415 4.347416 5.306421 5.374922 4.415916 4.004913 5.374922 4.073414
## 25 26 27 28 29 30 31 32
## 4.963919 4.963919 4.073414 5.306421 4.963919 4.758418 4.552917 5.237921
## 33 34 35 36 37 38 39 40
## 5.717424 4.847469 4.004913 4.278915 4.621417 4.758418 3.593911 3.525410
## 41 42 43 44 45 46 47 48
## 4.347416 5.580423 5.237921 4.895419 4.210415 5.306421 5.374922 4.552917
## 49 50 51 52 53 54 55 56
## 5.511923 5.237921 4.415916 5.237921 5.032420 3.799412 4.278915 4.826919
## 57 58 59 60 61 62 63 64
## 5.854425 6.059926 4.758418 5.032420 5.306421 5.717424 4.826919 4.073414
## 65 66 67 68 69 70 71 72
## 4.347416 4.689918 5.648924 4.758418 5.580423 4.963919 5.032420 5.374922
## 73 74 75 76 77 78 79 80
## 5.100920 5.717424 4.415916 4.963919 4.484416 4.826919 4.278915 5.443422
## 81 82 83 84 85 86 87 88
## 5.648924 4.847469 4.415916 4.141914 5.237921 4.552917 5.100920 4.073414
## 89 90 91 92 93 94 95 96
## 3.936413 5.717424 4.963919 4.278915 4.552917 4.073414 3.730912 3.319909
## 97 98 99 100
## 5.717424 4.210415 4.484416 NA
Residuos
head(resid(modelo))
## 1 2 3 4 5 6
## 0.4005878 -0.2529168 0.3045811 0.1005878 1.2195769 -0.2899177
qqnorm(resid(modelo))
shapiro.test(resid(modelo))
##
## Shapiro-Wilk normality test
##
## data: resid(modelo)
## W = 0.98515, p-value = 0.3325
plot(hatco$fidelida, hatco$satisfac)
abline(modelo)
# segments(hatco$fidelida, fitted(modelo), hatco$fidelida, hatco$satisfac)
with(hatco, segments(fidelida, fitted(modelo), fidelida, satisfac))
plot(fitted(modelo), resid(modelo))
Banda de confianza
predict(modelo, interval='confidence')
## fit lwr upr
## 1 3.799412 3.571263 4.027561
## 2 4.552917 4.424306 4.681528
## 3 4.895419 4.772225 5.018613
## 4 3.799412 3.571263 4.027561
## 5 5.580423 5.380031 5.780815
## 6 4.689918 4.567906 4.811929
## 7 4.758418 4.637529 4.879307
## 8 4.621417 4.496801 4.746033
## 9 5.922925 5.665048 6.180803
## 10 5.306421 5.146011 5.466832
## 11 3.799412 3.571263 4.027561
## 12 4.826919 4.705631 4.948206
## 13 4.278915 4.123089 4.434741
## 14 4.210415 4.045670 4.375159
## 15 5.306421 5.146011 5.466832
## 16 4.963919 4.837379 5.090459
## 17 4.210415 4.045670 4.375159
## 18 4.347416 4.199793 4.495038
## 19 5.306421 5.146011 5.466832
## 20 5.374922 5.205264 5.544580
## 21 4.415916 4.275658 4.556174
## 22 4.004913 3.810147 4.199680
## 23 5.374922 5.205264 5.544580
## 24 4.073414 3.889113 4.257714
## 25 4.963919 4.837379 5.090459
## 26 4.963919 4.837379 5.090459
## 27 4.073414 3.889113 4.257714
## 28 5.306421 5.146011 5.466832
## 29 4.963919 4.837379 5.090459
## 30 4.758418 4.637529 4.879307
## 31 4.552917 4.424306 4.681528
## 32 5.237921 5.086103 5.389740
## 33 5.717424 5.494745 5.940103
## 34 4.847469 4.725765 4.969172
## 35 4.004913 3.810147 4.199680
## 36 4.278915 4.123089 4.434741
## 37 4.621417 4.496801 4.746033
## 38 4.758418 4.637529 4.879307
## 39 3.593911 3.330292 3.857530
## 40 3.525410 3.249642 3.801179
## 41 4.347416 4.199793 4.495038
## 42 5.580423 5.380031 5.780815
## 43 5.237921 5.086103 5.389740
## 44 4.895419 4.772225 5.018613
## 45 4.210415 4.045670 4.375159
## 46 5.306421 5.146011 5.466832
## 47 5.374922 5.205264 5.544580
## 48 4.552917 4.424306 4.681528
## 49 5.511923 5.322196 5.701650
## 50 5.237921 5.086103 5.389740
## 51 4.415916 4.275658 4.556174
## 52 5.237921 5.086103 5.389740
## 53 5.032420 4.901205 5.163635
## 54 3.799412 3.571263 4.027561
## 55 4.278915 4.123089 4.434741
## 56 4.826919 4.705631 4.948206
## 57 5.854425 5.608471 6.100378
## 58 6.059926 5.777748 6.342104
## 59 4.758418 4.637529 4.879307
## 60 5.032420 4.901205 5.163635
## 61 5.306421 5.146011 5.466832
## 62 5.717424 5.494745 5.940103
## 63 4.826919 4.705631 4.948206
## 64 4.073414 3.889113 4.257714
## 65 4.347416 4.199793 4.495038
## 66 4.689918 4.567906 4.811929
## 67 5.648924 5.437531 5.860316
## 68 4.758418 4.637529 4.879307
## 69 5.580423 5.380031 5.780815
## 70 4.963919 4.837379 5.090459
## 71 5.032420 4.901205 5.163635
## 72 5.374922 5.205264 5.544580
## 73 5.100920 4.963837 5.238003
## 74 5.717424 5.494745 5.940103
## 75 4.415916 4.275658 4.556174
## 76 4.963919 4.837379 5.090459
## 77 4.484416 4.350544 4.618289
## 78 4.826919 4.705631 4.948206
## 79 4.278915 4.123089 4.434741
## 80 5.443422 5.263964 5.622881
## 81 5.648924 5.437531 5.860316
## 82 4.847469 4.725765 4.969172
## 83 4.415916 4.275658 4.556174
## 84 4.141914 3.967647 4.316181
## 85 5.237921 5.086103 5.389740
## 86 4.552917 4.424306 4.681528
## 87 5.100920 4.963837 5.238003
## 88 4.073414 3.889113 4.257714
## 89 3.936413 3.730815 4.142011
## 90 5.717424 5.494745 5.940103
## 91 4.963919 4.837379 5.090459
## 92 4.278915 4.123089 4.434741
## 93 4.552917 4.424306 4.681528
## 94 4.073414 3.889113 4.257714
## 95 3.730912 3.491126 3.970697
## 96 3.319909 3.006980 3.632839
## 97 5.717424 5.494745 5.940103
## 98 4.210415 4.045670 4.375159
## 99 4.484416 4.350544 4.618289
## 100 NA NA NA
Banda de predicción
head(predict(modelo, interval='prediction'))
## fit lwr upr
## 1 3.799412 2.575563 5.023261
## 2 4.552917 3.343663 5.762171
## 3 4.895419 3.686729 6.104109
## 4 3.799412 2.575563 5.023261
## 5 5.580423 4.361444 6.799403
## 6 4.689918 3.481348 5.898487
Representación gráfica de las bandas
<- data.frame(fidelida=24:66)
bandas.frame <- predict(modelo, interval = 'confidence', newdata = bandas.frame)
bc <- predict(modelo, interval = 'prediction', newdata = bandas.frame)
bp plot(hatco$fidelida, hatco$satisfac, ylim = range(hatco$satisfac, bp, na.rm = TRUE))
matlines(bandas.frame$fidelida, bc, lty=c(1,2,2), col='black')
matlines(bandas.frame$fidelida, bp, lty=c(0,3,3), col='red')
6.3.2 Correlación
Coeficiente de correlación de Pearson
cor(hatco$fidelida, hatco$satisfac, use='complete.obs')
## [1] 0.712581
cor(hatco[,6:14], use='complete.obs')
## velocida precio flexprec imgfabri servconj
## velocida 1.00000000 -0.35439461 0.51879732 0.04885481 0.60908594
## precio -0.35439461 1.00000000 -0.48550163 0.27150666 0.51134698
## flexprec 0.51879732 -0.48550163 1.00000000 -0.11472112 0.07496499
## imgfabri 0.04885481 0.27150666 -0.11472112 1.00000000 0.29800272
## servconj 0.60908594 0.51134698 0.07496499 0.29800272 1.00000000
## imgfvent 0.08084452 0.18873090 -0.03801323 0.79015164 0.24641510
## calidadp -0.48984768 0.46822563 -0.44542562 0.19904126 -0.06152068
## fidelida 0.67428681 0.07682487 0.57807750 0.22442574 0.69802972
## satisfac 0.64981476 0.02636286 0.53057615 0.47553688 0.63054720
## imgfvent calidadp fidelida satisfac
## velocida 0.08084452 -0.48984768 0.67428681 0.64981476
## precio 0.18873090 0.46822563 0.07682487 0.02636286
## flexprec -0.03801323 -0.44542562 0.57807750 0.53057615
## imgfabri 0.79015164 0.19904126 0.22442574 0.47553688
## servconj 0.24641510 -0.06152068 0.69802972 0.63054720
## imgfvent 1.00000000 0.18052945 0.26674626 0.34349253
## calidadp 0.18052945 1.00000000 -0.20401261 -0.28687427
## fidelida 0.26674626 -0.20401261 1.00000000 0.71258104
## satisfac 0.34349253 -0.28687427 0.71258104 1.00000000
cor.test(hatco$fidelida, hatco$satisfac)
##
## Pearson's product-moment correlation
##
## data: hatco$fidelida and hatco$satisfac
## t = 10.003, df = 97, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5995024 0.7977691
## sample estimates:
## cor
## 0.712581
El coeficiente de correlación de Spearman es una variante no paramétrica
cor.test(hatco$fidelida, hatco$satisfac, method='spearman')
##
## Spearman's rank correlation rho
##
## data: hatco$fidelida and hatco$satisfac
## S = 46601, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.7118039