read file
auto <- read.csv('car.csv')
summary(auto)
## buying maint doors persons
## Length:1728 Length:1728 Length:1728 Length:1728
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## lug_boot safety car_accept
## Length:1728 Length:1728 Length:1728
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
auto$buying<-as.factor(auto$buying)
auto$maint<-as.factor(auto$maint)
auto$doors<-as.numeric(auto$doors)
## Warning: NAs introduced by coercion
auto$persons<-as.numeric(auto$persons)
## Warning: NAs introduced by coercion
auto$lug_boot<-as.factor(auto$lug_boot)
auto$safety<-as.factor(auto$safety)
auto$car_accept<-as.factor(auto$car_accept)
summary(auto)
## buying maint doors persons lug_boot safety
## high :432 high :432 Min. :2 Min. :2 big :576 high:576
## low :432 low :432 1st Qu.:2 1st Qu.:2 med :576 low :576
## med :432 med :432 Median :3 Median :3 small:576 med :576
## vhigh:432 vhigh:432 Mean :3 Mean :3
## 3rd Qu.:4 3rd Qu.:4
## Max. :4 Max. :4
## NA's :432 NA's :576
## car_accept
## acc : 384
## good : 69
## unacc:1210
## vgood: 65
##
##
##
auto_freq<-table(auto$lug_boot,auto$car_accept)
condicional percentage my way
percentage_auto_freq<-rbind(auto_freq[,'acc']/sum(auto_freq[,'acc']),auto_freq[,'good']/sum(auto_freq[,'good']),auto_freq[,'unacc']/sum(auto_freq[,'unacc']),auto_freq[,'vgood']/sum(auto_freq[,'vgood']))
#percentage_auto_freq
percentage_auto_freq<-t(percentage_auto_freq*100)
colnames(percentage_auto_freq)<-(colnames(auto_freq))
percentage_auto_freq
## acc good unacc vgood
## big 37.50000 34.78261 30.41322 61.53846
## med 35.15625 34.78261 32.39669 38.46154
## small 27.34375 30.43478 37.19008 0.00000
condicional percentage teacher way
cond_percent <- function(X) X/sum(X)*100
percentage_auto_freq<-apply(auto_freq, 2,cond_percent )
percentage_auto_freq
##
## acc good unacc vgood
## big 37.50000 34.78261 30.41322 61.53846
## med 35.15625 34.78261 32.39669 38.46154
## small 27.34375 30.43478 37.19008 0.00000
library(RColorBrewer)
coul <- brewer.pal(5, "Set2")
par(mfrow=c(1,2))
#barplot(t(auto_freq),legend.text = TRUE,xlab = 'Lug Boot', ylab = 'Frecuency' ,main = 'Luggage Boot by Car Accept', #col=coul ,beside = TRUE)
barplot(auto_freq,xlab = 'Car Accept', ylab = 'Frecuency',main = 'Car Accept by Luggage Boot' ,beside = TRUE,legend.text=c('big','med','small'),col=c('lightblue','pink','lightgreen'))

auto_freq
##
## acc good unacc vgood
## big 144 24 368 40
## med 135 24 392 25
## small 105 21 450 0
chisq.test(auto_freq)
##
## Pearson's Chi-squared test
##
## data: auto_freq
## X-squared = 53.282, df = 6, p-value = 1.029e-09
Movies
movies <- read.csv('movies.csv')
summary(movies)
## Movie LeadStudio RottenTomatoes AudienceScore
## Length:612 Length:612 Min. : 0.00 Min. :19.00
## Class :character Class :character 1st Qu.:26.00 1st Qu.:48.00
## Mode :character Mode :character Median :47.50 Median :60.00
## Mean :49.14 Mean :60.41
## 3rd Qu.:72.00 3rd Qu.:73.00
## Max. :99.00 Max. :96.00
## Story Genre TheatersOpenWeek OpeningWeekend
## Length:612 Length:612 Min. : 2 Min. : 0.032
## Class :character Class :character 1st Qu.:2424 1st Qu.: 8.360
## Mode :character Mode :character Median :2858 Median : 14.780
## Mean :2717 Mean : 22.558
## 3rd Qu.:3332 3rd Qu.: 27.762
## Max. :4468 Max. :169.190
## BOAvgOpenWeekend DomesticGross ForeignGross WorldGross
## Min. : 1003 Min. : 0.97 Min. : 0.01 Min. : 4.677
## 1st Qu.: 3828 1st Qu.: 25.03 1st Qu.: 16.82 1st Qu.: 43.610
## Median : 5978 Median : 44.47 Median : 46.66 Median : 91.375
## Mean : 8046 Mean : 73.58 Mean : 96.94 Mean : 170.377
## 3rd Qu.: 9715 3rd Qu.: 94.97 3rd Qu.: 103.43 3rd Qu.: 203.060
## Max. :93230 Max. :760.50 Max. :2021.00 Max. :2781.500
## Budget Profitability OpenProfit Year
## Min. : 0.5 Min. : 18.17 Min. : 0.34 Min. :2007
## 1st Qu.: 20.0 1st Qu.: 150.79 1st Qu.: 21.90 1st Qu.:2008
## Median : 38.5 Median : 253.78 Median : 37.41 Median :2009
## Mean : 57.2 Mean : 355.40 Mean : 56.39 Mean :2009
## 3rd Qu.: 75.0 3rd Qu.: 394.61 3rd Qu.: 59.18 3rd Qu.:2010
## Max. :300.0 Max. :6694.40 Max. :1368.00 Max. :2011
movies$LeadStudio<-as.factor(movies$LeadStudio)
movies$RottenTomatoes<-as.numeric(movies$RottenTomatoes)
movies$AudienceScore<-as.numeric(movies$AudienceScore)
movies$Story<-as.factor(movies$Story)
movies$Genre<-as.factor(movies$Genre)
movies$TheatersOpenWeek<-as.numeric(movies$TheatersOpenWeek)
movies$OpeningWeekend<-as.numeric(movies$OpeningWeekend)
movies$BOAvgOpenWeekend<-as.numeric(movies$BOAvgOpenWeekend)
movies$DomesticGross<-as.numeric(movies$DomesticGross)
movies$ForeignGross<-as.numeric(movies$ForeignGross)
movies$WorldGross<-as.numeric(movies$WorldGross)
movies$Budget<-as.numeric(movies$Budget)
movies$Profitability<-as.numeric(movies$Profitability)
movies$OpenProfit<-as.numeric(movies$OpenProfit)
movies$Year<-as.factor(movies$Year)
plot(movies$RottenTomatoes, movies$AudienceScore)

cor(movies$AudienceScore, movies$RottenTomatoes)
## [1] 0.6881901
model<- lm(movies$AudienceScore ~ movies$RottenTomatoes)
model
##
## Call:
## lm(formula = movies$AudienceScore ~ movies$RottenTomatoes)
##
## Coefficients:
## (Intercept) movies$RottenTomatoes
## 39.5875 0.4238
plot(movies$RottenTomatoes, movies$AudienceScore)
abline(model)

library(MASS)
residual=resid(model) ## Obtain residuals
stu.residual =studres(model) ## Obtain Studentized residuals
## Studentized residual plot
plot(movies$RottenTomatoes, stu.residual ,main="Studentized Residual Plot",xlab = 'Rotten Tomatoes',ylab = 'Residual')
## Add different horizontal line.
abline(h=c(-2,0,2), col=c("red", "blue", "red"), lty=c(2,1,2), lwd=c(3,2,3))
abline(h=0)

## histogram of studentized residual
hist(stu.residual )

ERTL COMPANY
A<-c(15,15,20)
B<-c(13,11,25)
C<-c(21,13,12)
company<-data.frame(A,B,C)
company_total<-company
add total by column
l<-function(x) sum(x)
company_total<-rbind(company, apply(company, 2,l))
row.names(company_total)<-c('Minor Defect','Major Defect','Good','Total')
company_total
add total by row
l<-function(x) sum(x)
company_total<-cbind(company_total, apply(company_total, 1,l))
colnames(company_total)<-c('A','B','C','Total')
company_total
Total<-apply(company, 1, function(x) sum(x))
company_total<-cbind(company,Total)
total<-company_total[,'Total']
total/sum(total)*100
## [1] 33.79310 26.89655 39.31034
#total
row.names(company)<-c('Minor Defect','Major Defect','Good')
company
chisq.test(company) # Chi-square Test
##
## Pearson's Chi-squared test
##
## data: company
## X-squared = 7.2248, df = 4, p-value = 0.1245