Sansumg S3 Activity prediction

We will try to predict based on the accelerometer data of the phone the activity the user is doing. We have six potential activities: Laying Sitting Standing Walk Walkdown Walkup

Some Prelimiry Plots to check the data

# Loading library for svm
library(e1071)
## Loading required package: class

# Loading the data - the data has been donwload and prepared in advance
load("samsungData.rda")

# Focus on max acceleration
names(samsungData)[10:12]
## [1] "tBodyAcc-max()-X" "tBodyAcc-max()-Y" "tBodyAcc-max()-Z"

# Convert the activity to numeric for plotting purpose
numericActivity <- as.numeric(as.factor(samsungData$activity))[samsungData$subject == 
    1]

# Plotting the max acceleration X and Y
par(mfrow = c(2, 1))
plot(samsungData[samsungData$subject == 1, 10], pch = 19, col = numericActivity, 
    ylab = names(samsungData)[10])
legend("topleft", legend = unique(samsungData$activity), col = unique(numericActivity), 
    pch = 19)
plot(samsungData[samsungData$subject == 1, 11], pch = 19, col = numericActivity, 
    ylab = names(samsungData)[11])

plot of chunk unnamed-chunk-1

Use SVM for predicting based on the max acceleration X, Y and Z

# Creating the train and test (last block, 100) sets
samsungData_size <- nrow(samsungData)
samsungData_train <- samsungData[1:(samsungData_size - 100), c(10, 11, 12)]
samsungData_classes_train <- samsungData[1:(samsungData_size - 100), 563]
samsungData_test <- samsungData[(samsungData_size - 100):samsungData_size, c(10, 
    11, 12)]
samsungData_classes_test <- samsungData[(samsungData_size - 100):samsungData_size, 
    563]

# Training the model
model <- svm(samsungData_train, samsungData_classes_train, type = "C", kernel = "linear")

# Predict using the training model
pred <- predict(model, samsungData_test)
table(predicted = pred, observed = samsungData_classes_test)
##           observed
## predicted  laying walk walkdown walkup
##   laying        3    0        0      0
##   sitting       0    0        0      0
##   standing      8    0        0      0
##   walk          0   13        0      3
##   walkdown      0    0       27      2
##   walkup        0   19        1     25

The prediction is poor lets try to improve it

# We will introduce in the model more variables for that we will use the
# Singular Value Decomposition and find the maximun contributor (variance)
svd1 = svd(scale(samsungData[samsungData$subject == 1, -c(562, 563)]))
par(mfrow = c(2, 1))
plot(svd1$u[, 1], col = numericActivity, pch = 19)
legend("topleft", legend = unique(samsungData$activity), col = unique(numericActivity), 
    pch = 19)
plot(svd1$u[, 2], col = numericActivity, pch = 19)

plot of chunk unnamed-chunk-3

maxContrib <- which.max(svd1$v[, 2])
names(samsungData[(maxContrib - 2):maxContrib])
## [1] "fBodyAcc-meanFreq()-X" "fBodyAcc-meanFreq()-Y" "fBodyAcc-meanFreq()-Z"

# Train again using the maximum contributor
samsungData_train <- samsungData[1:(samsungData_size - 100), c(10, 11, 12, maxContrib - 
    2, maxContrib - 1, maxContrib)]
samsungData_test <- samsungData[(samsungData_size - 100):samsungData_size, c(10, 
    11, 12, maxContrib - 2, maxContrib - 1, maxContrib)]

model <- svm(samsungData_train, samsungData_classes_train, type = "C", kernel = "linear")
pred <- predict(model, samsungData_test)
table(predicted = pred, observed = samsungData_classes_test)
##           observed
## predicted  laying walk walkdown walkup
##   laying       11    0        0      0
##   sitting       0    0        0      0
##   standing      0    0        0      0
##   walk          0   19        0      0
##   walkdown      0    2       28      0
##   walkup        0   11        0     30