diff --git a/blogContent/posts/data-science/html/IntroToR.html b/blogContent/posts/data-science/html/IntroToR.html new file mode 100644 index 0000000..8509946 --- /dev/null +++ b/blogContent/posts/data-science/html/IntroToR.html @@ -0,0 +1,1293 @@ + + + +
+ +Jeffery Russell
9-30-19
Using R markdown you can easily create reports and presentations by embedding your code in the report. +This has major advantages.
+ +summary(cars)
+
+
+ speed dist
+ Min. : 4.0 Min. : 2.00
+ 1st Qu.:12.0 1st Qu.: 26.00
+ Median :15.0 Median : 36.00
+ Mean :15.4 Mean : 42.98
+ 3rd Qu.:19.0 3rd Qu.: 56.00
+ Max. :25.0 Max. :120.00
+
+
+plot(mtcars$wt, mtcars$mpg, main="Weight vs MPG", xlab = "weight", ylab="MPG")
+
+
+
+
+x <- 0
+if (x < 0)
+{
+ print("Negative number")
+} else if (x > 0)
+{
+ print("Positive number")
+} else
+{
+ print("Zero")
+}
+
+
+[1] "Zero"
+
+
+for(i in 1:5)
+{
+ print(i)
+}
+
+
+[1] 1
+[1] 2
+[1] 3
+[1] 4
+[1] 5
+
+
+for(i in (1:5)*2)
+{
+ print(i)
+}
+
+
+[1] 2
+[1] 4
+[1] 6
+[1] 8
+[1] 10
+
+
+x <- 2
+
+while(x == 2)
+{
+ print("Stonks")
+ x = x + 1
+}
+
+
+[1] "Stonks"
+
+
+Arrays are 1 indexed.
+ +for(i in c(1,4,5))
+{
+ print(i)
+}
+
+
+[1] 1
+[1] 4
+[1] 5
+
+
+ar <- c(1,3,9)
+
+print(ar[2])
+
+
+[1] 3
+
+
+Using the built in help command, you can view documentation for any function.
+ +help(plot)
+
+
+plot(x=1:10, y=(1:10)^2, xlab = "x", ylab = "y", main="Ez")
+
+
+
+
+ggplot(data = iris) +
+ theme(plot.title = element_text(hjust = 0.5)) +
+ ggtitle("Iris Flower Set") +
+ geom_point(mapping = aes(x=Sepal.Length, y=Petal.Length, color = Species)) +
+ labs(x = "Sepal Length", y = "Petal Length", color="Phase") +
+ theme_bw()
+
+
+
+
+ggplot(data = iris) + theme(plot.title = element_text(hjust = 0.5)) +
+ ggtitle("Iris Flow Length") +
+ geom_boxplot(mapping = aes(y=Petal.Length, x = Species), outlier.colour = "red", outlier.shape = 1) +
+ labs(x = "Flower Type", y = "Petal Length") +
+ coord_flip() +
+ theme_bw()
+
+
+
+
+sc <- spark_connect(master = "local")
+
+iris_tbl <- sdf_copy_to(sc, iris, name = "iris_tbl", overwrite = TRUE)
+
+partitions <- iris_tbl %>%
+ sdf_partition(training = 0.7, test = 0.3, seed = 1111)
+
+iris_training <- partitions$training
+iris_test <- partitions$test
+
+dt_model <- iris_training %>%
+ ml_decision_tree(Species ~ .)
+
+pred <- ml_predict(dt_model, iris_test)
+
+ml_multiclass_classification_evaluator(pred)
+
+
+[1] 0.9451737
+
+
+Visualize the built in “mpg” data-set from the tidyverse library.
+ +Step 1: Install Tidyverse package in R and include it in your R Script
+ +# Install tidyverse
+install.packages("tidyverse")
+
+# Include tidyverse in project
+
+library(tidyverse)
+
+
+
+
+plot(x = mpg$displ, y=mpg$hwy, main="Engine Size(Liters) vs MPG")
+
+
+
+
+ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y = hwy))
+
+
+
+
+ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y = hwy, color=class))
+
+
+
+
+ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y = hwy, shape=class))
+
+
+
+
+ggplot(data = mpg) + geom_point(mapping=aes(x=displ, y = hwy)) + facet_wrap(~ class, nrow=3)
+
+
+
+
+ggplot(data = mpg) +
+ geom_point(aes(x=displ, y = hwy, color=class))+
+ geom_smooth(aes(x=displ, y = hwy))
+
+
+
+
+ggplot(data = mpg, aes(x=displ, y = hwy, color=class)) +
+ geom_point()+
+ geom_smooth()
+
+
+
+
+