Read data from an URL

toluca <- read.table("http://www.cnachtsheim-text.csom.umn.edu/Kutner/Chapter%20%201%20Data%20Sets/CH01TA01.txt", sep ="" , header = FALSE)


#Look at the first 6 entries
head(toluca)
  V1  V2
1 80 399
2 30 121
3 50 221
4 90 376
5 70 361
6 60 224

Rename columns

colnames(toluca) <- c("lotSize", "hours")

#Look at the first 6 entries
head(toluca)
  lotSize hours
1      80   399
2      30   121
3      50   221
4      90   376
5      70   361
6      60   224

Making \(X\) and \(Y\) matrices

library(Matrix)
n <- nrow(toluca) # Number of observations
n
[1] 25
X <- toluca$lotSize
Y <- toluca$hours

Y <- as.matrix(Y) 
Y                       # This is your vector Y
      [,1]
 [1,]  399
 [2,]  121
 [3,]  221
 [4,]  376
 [5,]  361
 [6,]  224
 [7,]  546
 [8,]  352
 [9,]  353
[10,]  157
[11,]  160
[12,]  252
[13,]  389
[14,]  113
[15,]  435
[16,]  420
[17,]  212
[18,]  268
[19,]  377
[20,]  421
[21,]  273
[22,]  468
[23,]  244
[24,]  342
[25,]  323
X <- as.matrix(X)
X                       # Not quite the X matrix we need
      [,1]
 [1,]   80
 [2,]   30
 [3,]   50
 [4,]   90
 [5,]   70
 [6,]   60
 [7,]  120
 [8,]   80
 [9,]  100
[10,]   50
[11,]   40
[12,]   70
[13,]   90
[14,]   20
[15,]  110
[16,]  100
[17,]   30
[18,]   50
[19,]   90
[20,]  110
[21,]   30
[22,]   90
[23,]   40
[24,]   80
[25,]   70
X <- cbind(rep(1,n), X) # Adding a column of ones to make the X matrix
X                       # This is your X matrix
      [,1] [,2]
 [1,]    1   80
 [2,]    1   30
 [3,]    1   50
 [4,]    1   90
 [5,]    1   70
 [6,]    1   60
 [7,]    1  120
 [8,]    1   80
 [9,]    1  100
[10,]    1   50
[11,]    1   40
[12,]    1   70
[13,]    1   90
[14,]    1   20
[15,]    1  110
[16,]    1  100
[17,]    1   30
[18,]    1   50
[19,]    1   90
[20,]    1  110
[21,]    1   30
[22,]    1   90
[23,]    1   40
[24,]    1   80
[25,]    1   70

Find \(X'X\)

Xt <- t(X) # Taking the transpose of X matrix
Xt
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
[1,]    1    1    1    1    1    1    1    1    1     1     1     1     1     1
[2,]   80   30   50   90   70   60  120   80  100    50    40    70    90    20
     [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
[1,]     1     1     1     1     1     1     1     1     1     1     1
[2,]   110   100    30    50    90   110    30    90    40    80    70
XtX <- Xt %*% X
XtX
     [,1]   [,2]
[1,]   25   1750
[2,] 1750 142300

Find \(X'Y\)

XtY <- Xt %*% Y
XtY
       [,1]
[1,]   7807
[2,] 617180

Find \((X'X)^{-1}\)

XtXinv <- solve(XtX)
XtXinv
             [,1]          [,2]
[1,]  0.287474747 -3.535354e-03
[2,] -0.003535354  5.050505e-05

Find \(\bf{b} = (X'X)^{-1}X'Y\)

b <- XtXinv %*% Xt %*% Y
b
          [,1]
[1,] 62.365859
[2,]  3.570202

Varify answer by Finding the LS estimates uing \(lm\) function

toluca_LS_model <- lm(hours ~ lotSize, data = toluca)
summary(toluca_LS_model)

Call:
lm(formula = hours ~ lotSize, data = toluca)

Residuals:
    Min      1Q  Median      3Q     Max 
-83.876 -34.088  -5.982  38.826 103.528 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   62.366     26.177   2.382   0.0259 *  
lotSize        3.570      0.347  10.290 4.45e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 48.82 on 23 degrees of freedom
Multiple R-squared:  0.8215,    Adjusted R-squared:  0.8138 
F-statistic: 105.9 on 1 and 23 DF,  p-value: 4.449e-10

Section 5.11: Fitted Values and Residuals

Yhat <- X %*% b # Fitted Values
Yhat
##           [,1]
##  [1,] 347.9820
##  [2,] 169.4719
##  [3,] 240.8760
##  [4,] 383.6840
##  [5,] 312.2800
##  [6,] 276.5780
##  [7,] 490.7901
##  [8,] 347.9820
##  [9,] 419.3861
## [10,] 240.8760
## [11,] 205.1739
## [12,] 312.2800
## [13,] 383.6840
## [14,] 133.7699
## [15,] 455.0881
## [16,] 419.3861
## [17,] 169.4719
## [18,] 240.8760
## [19,] 383.6840
## [20,] 455.0881
## [21,] 169.4719
## [22,] 383.6840
## [23,] 205.1739
## [24,] 347.9820
## [25,] 312.2800
e <- Y - Yhat # Residuals
e
##              [,1]
##  [1,]  51.0179798
##  [2,] -48.4719192
##  [3,] -19.8759596
##  [4,]  -7.6840404
##  [5,]  48.7200000
##  [6,] -52.5779798
##  [7,]  55.2098990
##  [8,]   4.0179798
##  [9,] -66.3860606
## [10,] -83.8759596
## [11,] -45.1739394
## [12,] -60.2800000
## [13,]   5.3159596
## [14,] -20.7698990
## [15,] -20.0880808
## [16,]   0.6139394
## [17,]  42.5280808
## [18,]  27.1240404
## [19,]  -6.6840404
## [20,] -34.0880808
## [21,] 103.5280808
## [22,]  84.3159596
## [23,]  38.8260606
## [24,]  -5.9820202
## [25,]  10.7200000

Uses of inverse matrix

Consider the simultanious equations: \[2y_1+4y_2 =20\] \[3y_1+y_2=10\] Using Matrix methods, find the solutions for \(y_1\) and \(y_2\).

library(Matrix)
X <- matrix(c(2, 4, 3, 1),ncol=2, nrow = 2, byrow=TRUE)
X
     [,1] [,2]
[1,]    2    4
[2,]    3    1
Z<- matrix(c(20,10),ncol =1,nrow=2,byrow=TRUE)
Z
     [,1]
[1,]   20
[2,]   10
# Find inverse of X matrix 
invX <- solve(X)
invX 
     [,1] [,2]
[1,] -0.1  0.4
[2,]  0.3 -0.2
#Multiply invX with Z matrix to find solutions
Y<- invX %*% Z
Y
     [,1]
[1,]    2
[2,]    4

\(y_1=2\) and \(y_2 =4\)