Post on 05-Jan-2020
transcript
Ejercicio de RegresiónTécnico en RiesgosSeptember 22, 2017
Cargamos la base de datos
rm(list=ls())setwd("G:/UCR TR-ESTADISTICA/Curso 21-07-17/Clase 7")WiscLottery<-read.table("WiscLottery.csv",header=TRUE, row.names=1, sep=',')head(WiscLottery,10)
## PERPERHH MEDSCHYR MEDHVL PRCRENT PRC55P HHMEDAGE MEDINC SALES## 53003 3.0 12.6 71.3 21 38 48 54.2 1285.400## 53033 3.2 12.9 98.0 6 28 46 70.7 3571.450## 53038 2.8 12.4 58.7 25 35 45 43.6 2407.037## 53059 3.1 12.5 65.7 24 29 45 51.9 1223.825## 53072 2.6 13.1 96.7 32 27 42 63.1 15046.400## 53083 2.7 12.8 66.4 25 38 48 55.7 9128.725## 53095 2.8 12.9 91.0 31 37 48 54.9 33181.400## 53098 2.9 12.5 61.0 26 40 50 46.9 2242.988## 53104 2.8 12.8 91.5 18 35 48 62.3 21587.800## 53172 2.6 12.7 68.8 37 39 47 49.1 15693.275## POP## 53003 435## 53033 4823## 53038 2469## 53059 2051## 53072 13337## 53083 17004## 53095 38283## 53098 9859## 53104 4464## 53172 20958
Analicemos la base de datos
summary(WiscLottery)
## PERPERHH MEDSCHYR MEDHVL PRCRENT## Min. :2.200 Min. :12.20 Min. : 34.50 Min. : 6.00## 1st Qu.:2.600 1st Qu.:12.50 1st Qu.: 43.77 1st Qu.:19.25## Median :2.700 Median :12.60 Median : 53.90 Median :24.00## Mean :2.706 Mean :12.70 Mean : 57.09 Mean :24.68## 3rd Qu.:2.800 3rd Qu.:12.78 3rd Qu.: 66.47 3rd Qu.:27.00## Max. :3.200 Max. :15.90 Max. :120.00 Max. :62.00
1
## PRC55P HHMEDAGE MEDINC SALES## Min. :25.0 Min. :41.00 Min. :27.90 Min. : 189.0## 1st Qu.:35.0 1st Qu.:46.00 1st Qu.:38.17 1st Qu.: 821.3## Median :40.0 Median :48.00 Median :43.10 Median : 2426.4## Mean :39.7 Mean :48.76 Mean :45.12 Mean : 6494.8## 3rd Qu.:44.0 3rd Qu.:51.00 3rd Qu.:53.62 3rd Qu.:10016.5## Max. :56.0 Max. :59.00 Max. :70.70 Max. :33181.4## POP## Min. : 280## 1st Qu.: 1964## Median : 4406## Mean : 9311## 3rd Qu.:15446## Max. :39098
cor(WiscLottery)
## PERPERHH MEDSCHYR MEDHVL PRCRENT PRC55P## PERPERHH 1.00000000 -0.1889199 0.06791617 -0.3683181 -0.5178883## MEDSCHYR -0.18891986 1.0000000 0.70780357 0.6422949 -0.4792305## MEDHVL 0.06791617 0.7078036 1.00000000 0.3726068 -0.6213228## PRCRENT -0.36831807 0.6422949 0.37260677 1.0000000 -0.3096679## PRC55P -0.51788827 -0.4792305 -0.62132281 -0.3096679 1.0000000## HHMEDAGE -0.47822064 -0.4810156 -0.54337571 -0.3695101 0.9636132## MEDINC 0.33397632 0.5222065 0.84133554 0.2001336 -0.7762837## SALES -0.13625569 0.4674995 0.57150508 0.4964811 -0.3857442## POP -0.18038028 0.5895641 0.52577941 0.6433933 -0.4035664## HHMEDAGE MEDINC SALES POP## PERPERHH -0.4782206 0.3339763 -0.1362557 -0.1803803## MEDSCHYR -0.4810156 0.5222065 0.4674995 0.5895641## MEDHVL -0.5433757 0.8413355 0.5715051 0.5257794## PRCRENT -0.3695101 0.2001336 0.4964811 0.6433933## PRC55P 0.9636132 -0.7762837 -0.3857442 -0.4035664## HHMEDAGE 1.0000000 -0.6809147 -0.3648614 -0.4157242## MEDINC -0.6809147 1.0000000 0.4687388 0.4329918## SALES -0.3648614 0.4687388 1.0000000 0.8862827## POP -0.4157242 0.4329918 0.8862827 1.0000000
cor(WiscLottery,method="kendall")
## PERPERHH MEDSCHYR MEDHVL PRCRENT PRC55P## PERPERHH 1.00000000 -0.04277469 0.08255332 -0.1737501 -0.4191611## MEDSCHYR -0.04277469 1.00000000 0.57966525 0.2924798 -0.3862710## MEDHVL 0.08255332 0.57966525 1.00000000 0.2203177 -0.4686885## PRCRENT -0.17375012 0.29247976 0.22031770 1.0000000 -0.2389852## PRC55P -0.41916112 -0.38627099 -0.46868853 -0.2389852 1.0000000## HHMEDAGE -0.40315765 -0.35376254 -0.41127755 -0.2620309 0.8757712## MEDINC 0.21156612 0.53872100 0.65766877 0.2460877 -0.5989118## SALES -0.10964385 0.46731795 0.46097297 0.4425961 -0.3129062## POP -0.12192396 0.37558517 0.40376001 0.4575993 -0.3046063## HHMEDAGE MEDINC SALES POP## PERPERHH -0.4031576 0.2115661 -0.1096438 -0.1219240## MEDSCHYR -0.3537625 0.5387210 0.4673180 0.3755852
2
## MEDHVL -0.4112776 0.6576688 0.4609730 0.4037600## PRCRENT -0.2620309 0.2460877 0.4425961 0.4575993## PRC55P 0.8757712 -0.5989118 -0.3129062 -0.3046063## HHMEDAGE 1.0000000 -0.5255419 -0.2930468 -0.3015163## MEDINC -0.5255419 1.0000000 0.4272877 0.3897060## SALES -0.2930468 0.4272877 1.0000000 0.7551020## POP -0.3015163 0.3897060 0.7551020 1.0000000
plot(WiscLottery$POP,WiscLottery$SALES)
0 10000 20000 30000 40000
050
0015
000
2500
0
WiscLottery$POP
Wis
cLot
tery
$SA
LES
#SALES:Online lottery sales to individual consumers#POP: Population, in thousands
Revisemos normalidad
# La linea del QQ plot, es la linea que une el cuantil 25 con el 75 de la distribución teórica y la distribución empírica.normalidad <- function(var){
qqnorm(var)qqline(var)hist(var)boxplot(var)
}normalidad(WiscLottery$SALES)
3
−2 −1 0 1 2
050
0015
000
2500
0Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
4
Histogram of var
var
Fre
quen
cy
0 5000 10000 15000 20000 25000 30000 35000
05
1015
2025
30
5
050
0015
000
2500
0
#Nota qqplot:#- En el eje X se presentan los cuantiles de la distribución normal.#- En el eje Y se presentan los cuantiles de la distribución muestral.#- Entre más cercana la distribución empírica a la normal, más se parece el gráfico a una línea recta.
#Además, revisemos cómo es el comportamiento de la variable POPnormalidad(WiscLottery$POP)
6
−2 −1 0 1 2
010
000
2000
030
000
4000
0Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
7
Histogram of var
var
Fre
quen
cy
0 10000 20000 30000 40000
05
1015
2025
8
010
000
2000
030
000
4000
0
Primer Modelo
#Realicemos el primer modelo, dejando de lado la no normalidad de la variable dependiente:modelo1<-lm(SALES~POP, data = WiscLottery)modelo1
#### Call:## lm(formula = SALES ~ POP, data = WiscLottery)#### Coefficients:## (Intercept) POP## 469.7036 0.6471
summary(modelo1)
#### Call:## lm(formula = SALES ~ POP, data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -6046.7 -1460.9 -670.5 485.6 18229.5##
9
## Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) 469.70360 702.90619 0.668 0.507## POP 0.64709 0.04881 13.258 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 3792 on 48 degrees of freedom## Multiple R-squared: 0.7855, Adjusted R-squared: 0.781## F-statistic: 175.8 on 1 and 48 DF, p-value: < 2.2e-16
qnorm(0.025,0,1)
## [1] -1.959964
qnorm(0.975)
## [1] 1.959964
#Calculemos los valores ajustados y errores del primer modelo:valores.ajustados1<-predict(modelo1,data=WiscLottery)plot(WiscLottery$POP,WiscLottery$SALES)lines(WiscLottery$POP,valores.ajustados1,type="l",col="green")
0 10000 20000 30000 40000
050
0015
000
2500
0
WiscLottery$POP
Wis
cLot
tery
$SA
LES
10
valor.predicho<-modelo1$coefficients[1]+modelo1$coefficients[2]*50000valor.predicho
## (Intercept)## 32824.44
residuos1<-WiscLottery$SALES-valores.ajustados1
#Ahora, los residuos:plot(residuos1)
0 10 20 30 40 50
−50
000
5000
1500
0
Index
resi
duos
1
normalidad(residuos1)
11
−2 −1 0 1 2
−50
000
5000
1500
0Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
12
Histogram of var
var
Fre
quen
cy
−10000 −5000 0 5000 10000 15000 20000
05
1015
2025
30
13
−50
000
5000
1500
0
Con transformación logarítmica
#Tranformación logarítmica#La tranformación mantiene el orden de los valores pero ajustando los valores extremos de la distribución más cerca del resto. Por lo tanto, permiten simetrizar distribuciones que son sesgadas o desviadas.
logSALES<-log(WiscLottery$SALES)logPOP<-log(WiscLottery$POP)normalidad(logSALES)
14
−2 −1 0 1 2
67
89
10Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
15
Histogram of var
var
Fre
quen
cy
5 6 7 8 9 10 11
05
1015
16
67
89
10
normalidad(logPOP)
17
−2 −1 0 1 2
67
89
10Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
18
Histogram of var
var
Fre
quen
cy
5 6 7 8 9 10 11
05
1015
19
67
89
10
#Ahora veamos que pasa con la regresión:modelo2<-lm(log(SALES)~log(POP),data = WiscLottery)modelo2
#### Call:## lm(formula = log(SALES) ~ log(POP), data = WiscLottery)#### Coefficients:## (Intercept) log(POP)## -0.7594 1.0285
#Interpretacion#Si cambia la poblacion en un 1%, entonces las ventas van a cambiar en b1% (1.0285%).
summary(modelo1)
#### Call:## lm(formula = SALES ~ POP, data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -6046.7 -1460.9 -670.5 485.6 18229.5##
20
## Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) 469.70360 702.90619 0.668 0.507## POP 0.64709 0.04881 13.258 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 3792 on 48 degrees of freedom## Multiple R-squared: 0.7855, Adjusted R-squared: 0.781## F-statistic: 175.8 on 1 and 48 DF, p-value: < 2.2e-16
summary(modelo2)
#### Call:## lm(formula = log(SALES) ~ log(POP), data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -1.3017 -0.3544 -0.0170 0.3356 2.0964#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -0.7594 0.5995 -1.267 0.211## log(POP) 1.0285 0.0704 14.609 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 0.6282 on 48 degrees of freedom## Multiple R-squared: 0.8164, Adjusted R-squared: 0.8126## F-statistic: 213.4 on 1 and 48 DF, p-value: < 2.2e-16
valores.ajustados2<-predict(modelo2,data=WiscLottery)plot(logPOP,logSALES)lines(logPOP,valores.ajustados2,type="l",col="green")
21
6 7 8 9 10
67
89
10
logPOP
logS
ALE
S
residuos2<-resid(modelo2,data=WiscLottery)
plot(residuos2)
22
0 10 20 30 40 50
−1.
00.
01.
02.
0
Index
resi
duos
2
normalidad(residuos2)
23
−2 −1 0 1 2
−1.
00.
01.
02.
0Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
24
Histogram of var
var
Fre
quen
cy
−1 0 1 2
05
1015
25
−1.
00.
01.
02.
0
MULTIPLE
#Regresión lineal múltiple
#Ahora intentemos probar algunas otras especificaciones. Para esto reproduzcamos nuevamente la matriz de correlaciones y veamos cuáles son las variables que se encuentran más correlacionadas con la variable dependiente:
correlacion<-cor(WiscLottery)correlacion
## PERPERHH MEDSCHYR MEDHVL PRCRENT PRC55P## PERPERHH 1.00000000 -0.1889199 0.06791617 -0.3683181 -0.5178883## MEDSCHYR -0.18891986 1.0000000 0.70780357 0.6422949 -0.4792305## MEDHVL 0.06791617 0.7078036 1.00000000 0.3726068 -0.6213228## PRCRENT -0.36831807 0.6422949 0.37260677 1.0000000 -0.3096679## PRC55P -0.51788827 -0.4792305 -0.62132281 -0.3096679 1.0000000## HHMEDAGE -0.47822064 -0.4810156 -0.54337571 -0.3695101 0.9636132## MEDINC 0.33397632 0.5222065 0.84133554 0.2001336 -0.7762837## SALES -0.13625569 0.4674995 0.57150508 0.4964811 -0.3857442## POP -0.18038028 0.5895641 0.52577941 0.6433933 -0.4035664## HHMEDAGE MEDINC SALES POP## PERPERHH -0.4782206 0.3339763 -0.1362557 -0.1803803## MEDSCHYR -0.4810156 0.5222065 0.4674995 0.5895641## MEDHVL -0.5433757 0.8413355 0.5715051 0.5257794## PRCRENT -0.3695101 0.2001336 0.4964811 0.6433933## PRC55P 0.9636132 -0.7762837 -0.3857442 -0.4035664
26
## HHMEDAGE 1.0000000 -0.6809147 -0.3648614 -0.4157242## MEDINC -0.6809147 1.0000000 0.4687388 0.4329918## SALES -0.3648614 0.4687388 1.0000000 0.8862827## POP -0.4157242 0.4329918 0.8862827 1.0000000
#Generemos un gráfico para ver esta relación:#install.packages(corrplot,dependencies=TRUE)library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.2
corrplot(correlacion)
−1
−0.8
−0.6
−0.4
−0.2
0
0.2
0.4
0.6
0.8
1
PE
RP
ER
HH
ME
DS
CH
YR
ME
DH
VL
PR
CR
EN
T
PR
C55
P
HH
ME
DA
GE
ME
DIN
C
SA
LES
PO
P
PERPERHH
MEDSCHYR
MEDHVL
PRCRENT
PRC55P
HHMEDAGE
MEDINC
SALES
POP
#Ahorra corramos un modelo con todas las variables:modelo3<-lm(SALES~.,data=WiscLottery)modelo3
#### Call:## lm(formula = SALES ~ ., data = WiscLottery)#### Coefficients:## (Intercept) PERPERHH MEDSCHYR MEDHVL PRCRENT## 46978.4789 -2819.6542 -3761.7376 150.3915 -58.9039## PRC55P HHMEDAGE MEDINC POP## -239.0666 307.9818 -87.3349 0.6645
27
summary(modelo3)
#### Call:## lm(formula = SALES ~ ., data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -6825.1 -1910.8 50.1 1099.8 14012.8#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) 4.698e+04 3.267e+04 1.438 0.1581## PERPERHH -2.820e+03 4.034e+03 -0.699 0.4885## MEDSCHYR -3.762e+03 1.661e+03 -2.265 0.0289 *## MEDHVL 1.504e+02 6.599e+01 2.279 0.0279 *## PRCRENT -5.890e+01 9.393e+01 -0.627 0.5341## PRC55P -2.391e+02 3.391e+02 -0.705 0.4849## HHMEDAGE 3.080e+02 5.307e+02 0.580 0.5649## MEDINC -8.733e+01 1.342e+02 -0.651 0.5189## POP 6.645e-01 6.693e-02 9.929 1.81e-12 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 3580 on 41 degrees of freedom## Multiple R-squared: 0.8367, Adjusted R-squared: 0.8048## F-statistic: 26.25 on 8 and 41 DF, p-value: 8.407e-14
#Ahora ajustemos un modelo más parsimonioso, tomemos en cuenta las variables que tuvieron un alto nivel de correlación y que a su vez resultaron significativas en el modelo3:modelo4<-lm(SALES~MEDSCHYR+MEDHVL+POP,data=WiscLottery)modelo4
#### Call:## lm(formula = SALES ~ MEDSCHYR + MEDHVL + POP, data = WiscLottery)#### Coefficients:## (Intercept) MEDSCHYR MEDHVL POP## 41804.5804 -3830.0366 127.5152 0.6483
summary(modelo4)
#### Call:## lm(formula = SALES ~ MEDSCHYR + MEDHVL + POP, data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -6588.6 -1727.7 31.3 1365.2 14246.1#### Coefficients:## Estimate Std. Error t value Pr(>|t|)
28
## (Intercept) 4.180e+04 1.583e+04 2.641 0.01125 *## MEDSCHYR -3.830e+03 1.355e+03 -2.826 0.00695 **## MEDHVL 1.275e+02 3.862e+01 3.302 0.00186 **## POP 6.483e-01 5.592e-02 11.594 3e-15 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 3445 on 46 degrees of freedom## Multiple R-squared: 0.8303, Adjusted R-squared: 0.8193## F-statistic: 75.05 on 3 and 46 DF, p-value: < 2.2e-16
#Analicemos los residuos:residuos4<-resid(modelo4,data=WiscLottery)plot(residuos4)
0 10 20 30 40 50
−50
000
5000
1000
0
Index
resi
duos
4
normalidad(residuos4)
29
−2 −1 0 1 2
−50
000
5000
1000
0Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
30
Histogram of var
var
Fre
quen
cy
−10000 −5000 0 5000 10000 15000
05
1015
20
31
−50
000
5000
1000
0
#Último intento:modelo5<-lm(log(SALES)~log(MEDSCHYR)+log(MEDHVL)+log(POP),data=WiscLottery)modelo5
#### Call:## lm(formula = log(SALES) ~ log(MEDSCHYR) + log(MEDHVL) + log(POP),## data = WiscLottery)#### Coefficients:## (Intercept) log(MEDSCHYR) log(MEDHVL) log(POP)## 2.2401 -2.6201 1.1306 0.9257
summary(modelo5)
#### Call:## lm(formula = log(SALES) ~ log(MEDSCHYR) + log(MEDHVL) + log(POP),## data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -1.12554 -0.31837 -0.02683 0.36063 1.53409#### Coefficients:
32
## Estimate Std. Error t value Pr(>|t|)## (Intercept) 2.24006 6.32159 0.354 0.72470## log(MEDSCHYR) -2.62010 2.83569 -0.924 0.36032## log(MEDHVL) 1.13062 0.40015 2.826 0.00696 **## log(POP) 0.92569 0.08028 11.531 3.62e-15 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 0.5898 on 46 degrees of freedom## Multiple R-squared: 0.8449, Adjusted R-squared: 0.8348## F-statistic: 83.53 on 3 and 46 DF, p-value: < 2.2e-16
modelo6<-lm(log(SALES)~log(MEDHVL)+log(POP),data=WiscLottery)modelo6
#### Call:## lm(formula = log(SALES) ~ log(MEDHVL) + log(POP), data = WiscLottery)#### Coefficients:## (Intercept) log(MEDHVL) log(POP)## -3.5045 0.9339 0.9109
summary(modelo6)
#### Call:## lm(formula = log(SALES) ~ log(MEDHVL) + log(POP), data = WiscLottery)#### Residuals:## Min 1Q Median 3Q Max## -1.10887 -0.30880 -0.00857 0.37102 1.61193#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -3.50450 1.14208 -3.069 0.00357 **## log(MEDHVL) 0.93385 0.33824 2.761 0.00820 **## log(POP) 0.91088 0.07854 11.598 2.17e-15 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 0.5889 on 47 degrees of freedom## Multiple R-squared: 0.842, Adjusted R-squared: 0.8353## F-statistic: 125.3 on 2 and 47 DF, p-value: < 2.2e-16
#Residuos;residuos6<-resid(modelo6,data=WiscLottery)plot(residuos6)
33
0 10 20 30 40 50
−1.
00.
00.
51.
01.
5
Index
resi
duos
6
normalidad(residuos6)
34
−2 −1 0 1 2
−1.
00.
00.
51.
01.
5Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
35
Histogram of var
var
Fre
quen
cy
−1.5 −1.0 −0.5 0.0 0.5 1.0 1.5 2.0
05
1015
36
−1.
00.
00.
51.
01.
5
Series de tiempo
#Series de tiemporm(list=ls())
#Instalamos el paquete#install.packages('forecast',dependencies=TRUE)
#Cargamos el paquetesuppressWarnings(library(forecast))
#Revisamos los datosplot(AirPassengers)
37
Time
AirP
asse
nger
s
1950 1952 1954 1956 1958 1960
100
200
300
400
500
600
Descomposicion<-decompose(AirPassengers,type=c("additive","multiplicative"),filter=NULL)names(Descomposicion)
## [1] "x" "seasonal" "trend" "random" "figure" "type"
plot(Descomposicion$trend)
38
Time
Des
com
posi
cion
$tre
nd
1950 1952 1954 1956 1958 1960
150
250
350
450
plot(Descomposicion$seasonal)
39
Time
Des
com
posi
cion
$sea
sona
l
1950 1952 1954 1956 1958 1960
−40
020
4060
plot(Descomposicion$random)
40
Time
Des
com
posi
cion
$ran
dom
1950 1952 1954 1956 1958 1960
−40
−20
020
4060
par(mfrow=c(2,2))plot(AirPassengers, main="Número de pasajeros",xlab="Años",ylab="Pasajeros transportados")plot(Descomposicion$trend, main="Tendencia")plot(Descomposicion$seasonal, main="Estacionaliedad")boxplot(AirPassengers, main="Boxplot")
41
Número de pasajeros
Años
Pas
ajer
os tr
ansp
orta
dos
1950 1954 1958
100
400
Tendencia
Time
Des
com
posi
cion
$tre
nd
1950 1954 1958
150
350
Estacionaliedad
Time
Des
com
posi
cion
$sea
sona
l
1950 1954 1958
−40
20
100
400
Boxplot
#Correlogramapar(mfrow=c(1,1))Acf(AirPassengers,lag.max=25,main="ACF")
42
−0.
20.
20.
40.
60.
8
Lag
AC
FACF
6 12 18 24
#Primer modelomodelo1<-Arima(AirPassengers,order=c(1,1,1),seasonal=c(0,0,0))summary(modelo1)
## Series: AirPassengers## ARIMA(1,1,1)#### Coefficients:## ar1 ma1## -0.4741 0.8634## s.e. 0.1159 0.0720#### sigma^2 estimated as 975.8: log likelihood=-694.34## AIC=1394.68 AICc=1394.86 BIC=1403.57#### Training set error measures:## ME RMSE MAE MPE MAPE MASE## Training set 1.9209 30.91125 24.12176 0.4150742 8.566115 0.7530918## ACF1## Training set 0.03749257
#Primera proyecciónpar(mfrow=c(1,1))plot(forecast(modelo1,h=20))
43
Forecasts from ARIMA(1,1,1)
1950 1952 1954 1956 1958 1960 1962
100
300
500
700
#Segundo modelomodelo2<-auto.arima(AirPassengers,d=NA,D=NA,max.p=5,max.q=5,max.P=2,max.Q=2,max.order=5,max.d=2,max.D=1,
start.p=2,start.q=2,start.P=1,start.Q=1,stationary=FALSE,seasonal=TRUE)summary(modelo2)
## Series: AirPassengers## ARIMA(2,1,1)(0,1,0)[12]#### Coefficients:## ar1 ar2 ma1## 0.5960 0.2143 -0.9819## s.e. 0.0888 0.0880 0.0292#### sigma^2 estimated as 132.3: log likelihood=-504.92## AIC=1017.85 AICc=1018.17 BIC=1029.35#### Training set error measures:## ME RMSE MAE MPE MAPE MASE## Training set 1.3423 10.84619 7.86754 0.420698 2.800458 0.245628## ACF1## Training set -0.00124847
#Segunda proyeccionplot(forecast(modelo2,h=20))
44
Forecasts from ARIMA(2,1,1)(0,1,0)[12]
1950 1952 1954 1956 1958 1960 1962
100
300
500
700
45