MIS 207

Introduction to R Language
Basic Data Types and Structures


I. Ozkan, PhD

Professor
MIS
Cankaya University

iozkan@cankaya.edu.tr

Fall 2025

Reference

Additional

Quick Refresher from Previous Classes

✅ Tidyverse style guide

✅ Frequently used [some] functions

✅ Data Types in

❌ Data Creation in

R Objects

is a Vector Language: Sample Output

x_vec = c(1, 2.2, -3, 4, 0.5) # some values 
x_vec
## [1]  1.0  2.2 -3.0  4.0  0.5
is.vector(x_vec)
## [1] TRUE
x_vec > 0 # finding which elements in x are larger than 0
## [1]  TRUE  TRUE FALSE  TRUE  TRUE
is.vector(x_vec > 0)
## [1] TRUE
sum(1,2) # 1 + 2
## [1] 3
is.vector(sum(1,2))
## [1] TRUE
sum(x_vec > 0) # summing the number of elements (i.e., how many are > 0)
## [1] 4
sum(x_vec < 0) # summing the number of elements (i.e., how many are < 0)
## [1] 1

is a Vector Language: Types and Attributes


Source: The content and image are from Hadley Wickham’s Advanced R: Chapter 3 on Vectors

: Atomic Vectors

dim(x_vec) 
## NULL

Atomic vectors have a dim of NULL, which distinguishes it from 1-D arrays 😲!!!

Atomic Vectors: Making Longer Vectors

dbl_var <- c(1, 2.5, 4.5)
int_var <- c(1L, 6L, 10L)
lgl_var <- c(TRUE, FALSE)
chr_var <- c("these are", "some strings")


- You can determine the type of a vector with typeof() and its length with length()

typeof(dbl_var)
#> [1] "double"
typeof(int_var)
#> [1] "integer"
typeof(lgl_var)
#> [1] "logical"
typeof(chr_var)
#> [1] "character"
c(c(1,2), c(3,4))
## [1] 1 2 3 4

Missing Values

NA > 5
#> [1] NA
10 * NA
#> [1] NA
!NA
#> [1] NA
NA^0
## [1] 1
#> [1] 1
NA | TRUE
## [1] TRUE
#> [1] TRUE
NA & FALSE
## [1] FALSE
#> [1] FALSE
x <- c(NA, 5, NA, 10)
x == NA
#> [1] NA NA NA NA
is.na(x)
#> [1]  TRUE FALSE  TRUE FALSE

Names

# When creating it: 
x <- c(a = 1, b = 2, c = 3)
x
#> a b c 
#> 1 2 3

# By assigning a character vector to names()
x <- 1:3
names(x) <- c("a", "b", "c")
x
#> a b c 
#> 1 2 3

# Inline, with setNames():
x <- setNames(1:3, c("a", "b", "c"))
x
#> a b c 
#> 1 2 3

Summary

Source: The content and image are from isa401: An Undergrad Course on Business Intelligence & Data Visualization, Introduction to R as of July 22, 2025

Data Types: Formal Definitions

Data Structures: ➡️ 2D: Matrices

Matrices

x_mat = matrix(1:4, nrow = 2, ncol = 2 ) 
str(x_mat) # its structure?
##  int [1:2, 1:2] 1 2 3 4
x_mat # printing it nicely
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
print('-----------------')
## [1] "-----------------"
x_mat[1, 2] # subsetting #<< 
## [1] 3

Matrices

x_char = matrix(letters[1:12], nrow = 3, ncol =4)
x_char
##      [,1] [,2] [,3] [,4]
## [1,] "a"  "d"  "g"  "j" 
## [2,] "b"  "e"  "h"  "k" 
## [3,] "c"  "f"  "i"  "l"
x_char[1:2, 2:3] # subsetting #<< 
##      [,1] [,2]
## [1,] "d"  "g" 
## [2,] "e"  "h"

Matrices: matrix() and dim()

m <- matrix(1:6, nrow = 2, ncol = 3) 
m
#>      [,1] [,2] [,3]
#> [1,]    1    3    5
#> [2,]    2    4    6

dim() function may help to create a matrix directly from vectors by adding a dimension attribute

m <- 1:10 
m
##  [1]  1  2  3  4  5  6  7  8  9 10
dim(m) <- c(2, 5)
m
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10

Matrices: rbind() and cbind()

rbind(), cbind(): functions may help to create a matrix by column-binding or row-binding

x <- 1:3
y <- 10:12
cbind(x, y)
##      x  y
## [1,] 1 10
## [2,] 2 11
## [3,] 3 12
rbind(x, y) 
##   [,1] [,2] [,3]
## x    1    2    3
## y   10   11   12

Matrices: t()

t(): transpose of a matrix

x <- matrix(1:8, nrow = 4)
x
##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6
## [3,]    3    7
## [4,]    4    8
t(x)
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
t(t(x))
##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6
## [3,]    3    7
## [4,]    4    8
z <- 1:4
t(z)
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
t(t(z))
##      [,1]
## [1,]    1
## [2,]    2
## [3,]    3
## [4,]    4

Matrices: rownames(), colnames(): Row names and Column names of a matrix

x
##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6
## [3,]    3    7
## [4,]    4    8
rownames(x)
## NULL
colnames(x)
## NULL
rownames(x) <- paste0("row_",1:4)
colnames(x) <- paste0("col_",1:2)

rownames(x)
## [1] "row_1" "row_2" "row_3" "row_4"
colnames(x)
## [1] "col_1" "col_2"
t(x)
##       row_1 row_2 row_3 row_4
## col_1     1     2     3     4
## col_2     5     6     7     8
rownames(x) <- colnames(x) <- NULL
x
##      [,1] [,2]
## [1,]    1    5
## [2,]    2    6
## [3,]    3    7
## [4,]    4    8

Lists

lst <- list( # list constructor/creator
  1:3, # atomic double/numeric vector  of length = 3 #<< 
  "a", # atomic character vector of length = 1 (aka scalar) #<< 
  c(TRUE, FALSE, TRUE), # atomic logical vector of length = 3 #<< 
  c(2.3, 5.9) # atomic double/numeric vector of length =3 #<< 
)
lst # printing the list
## [1] "1:3"                  "a"                    "c(TRUE, FALSE, TRUE)"
## [4] "c(2.3, 5.9)"

Lists

x <- list(1, "a", TRUE, 1 + 4i) 
x
## [[1]]
## [1] 1
## 
## [[2]]
## [1] "a"
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] 1+4i
y <- list(a=1, b="a", c=TRUE, d=1 + 4i) 
y
## $a
## [1] 1
## 
## $b
## [1] "a"
## 
## $c
## [1] TRUE
## 
## $d
## [1] 1+4i

Lists

x <- vector("list", length = 5)
x
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL

Lists: A brief Summary

lst <- list( # list constructor/creator
  1:3, # atomic double/numeric vector  of length = 3 #<< 
  "a", # atomic character vector of length = 1 (aka scalar) #<< 
  c(TRUE, FALSE, TRUE), # atomic logical vector of length = 3 #<< 
  c(2.3, 5.9) # atomic double/numeric vector of length =3 #<< 
)
lst # printing the list
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] "a"
## 
## [[3]]
## [1]  TRUE FALSE  TRUE
## 
## [[4]]
## [1] 2.3 5.9
str(lst) # structure 
## List of 4
##  $ : int [1:3] 1 2 3
##  $ : chr "a"
##  $ : logi [1:3] TRUE FALSE TRUE
##  $ : num [1:2] 2.3 5.9
lst[1] # subsetting 
## [[1]]
## [1] 1 2 3
lst[[1]]
## [1] 1 2 3
paste(lst) # use paste() 
## [1] "1:3"                  "a"                    "c(TRUE, FALSE, TRUE)"
## [4] "c(2.3, 5.9)"

Factors

x <- factor(c("yes", "yes", "no", "yes", "no")) 
x
## [1] yes yes no  yes no 
## Levels: no yes
as.character(x)
## [1] "yes" "yes" "no"  "yes" "no"
table(x) 
## x
##  no yes 
##   2   3
levels(x) ## Levels are put in alphabetical order
## [1] "no"  "yes"
x <- factor(c("yes", "yes", "no", "yes", "no"),
           levels = c("yes", "no"))
x
## [1] yes yes no  yes no 
## Levels: yes no
as.character(x)
## [1] "yes" "yes" "no"  "yes" "no"

Data Frames

Data Frames

x <- data.frame(x = 1:4, z = c(T, T, F, F))
x
##   x     z
## 1 1  TRUE
## 2 2  TRUE
## 3 3 FALSE
## 4 4 FALSE
str(x)
## 'data.frame':    4 obs. of  2 variables:
##  $ x: int  1 2 3 4
##  $ z: logi  TRUE TRUE FALSE FALSE
nrow(x)
## [1] 4
ncol(x)
## [1] 2

Data Frames: Subsetting

x
##   x     z
## 1 1  TRUE
## 2 2  TRUE
## 3 3 FALSE
## 4 4 FALSE
x$x
## [1] 1 2 3 4
x[,1]
## [1] 1 2 3 4
x$z 
## [1]  TRUE  TRUE FALSE FALSE
x[,2]
## [1]  TRUE  TRUE FALSE FALSE
x[1,]
##   x    z
## 1 1 TRUE
x[1:2,1]
## [1] 1 2

Data Frames: Subsetting

Example: Edgar Anderson’s Iris Data


Sepal.Length Sepal.Width Petal.Length Petal.Width
setosa
5.1 3.5 1.4 0.2
4.9 3.0 1.4 0.2
4.7 3.2 1.3 0.2
versicolor
7.0 3.2 4.7 1.4
6.4 3.2 4.5 1.5
6.9 3.1 4.9 1.5
virginica
6.3 3.3 6.0 2.5
5.8 2.7 5.1 1.9
7.1 3.0 5.9 2.1

Data Frames: Subsetting

Example: Edgar Anderson’s Iris Data: first 6 rows

head(iris) # first 6 rows 
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Data Frames: Subsetting

Example: Edgar Anderson’s Iris Data: last 6 rows

tail(iris) # last 6 rows 
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

Data Frames: Subsetting

Example: rows 111 through 116 (all columns)

# [row numbers,]  
iris[111:116,] # rows 111 through 116 (all columns) 
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 111          6.5         3.2          5.1         2.0 virginica
## 112          6.4         2.7          5.3         1.9 virginica
## 113          6.8         3.0          5.5         2.1 virginica
## 114          5.7         2.5          5.0         2.0 virginica
## 115          5.8         2.8          5.1         2.4 virginica
## 116          6.4         3.2          5.3         2.3 virginica

Data Frames: Subsetting

Example: accessing columns

# [,] : by indexing
head(iris[,1]) # first column   
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
# [,"column name"] by column name   
colnames(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
head(iris[,"Sepal.Length"]) # Sepal.Length column   
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
all.equal(head(iris[,1]), head(iris[,"Sepal.Length"]))
## [1] TRUE
head(iris[["Sepal.Length"]]) # Sepal.Length column   
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
all.equal(head(iris[,1]), head(iris[["Sepal.Length"]]))
## [1] TRUE
head(iris["Sepal.Length"]) # Sepal.Length column returning data frame 
##   Sepal.Length
## 1          5.1
## 2          4.9
## 3          4.7
## 4          4.6
## 5          5.0
## 6          5.4

Data Frames: Subsetting by Filtering

Example: column values > some value

head(iris$Sepal.Length > 7.5) # comparison returns TRUE/FALSE
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
sum(iris$Sepal.Length > 7.5) # number of TRUE 
## [1] 6
iris[iris$Sepal.Length > 7.5,] # filter based on Sepal.Length
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 106          7.6         3.0          6.6         2.1 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 132          7.9         3.8          6.4         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
iris[iris$Sepal.Length > 7.5,c(2,5)] # filter and select column(s)
##     Sepal.Width   Species
## 106         3.0 virginica
## 118         3.8 virginica
## 119         2.6 virginica
## 123         2.8 virginica
## 132         3.8 virginica
## 136         3.0 virginica
# more complex filtering (using AND)
iris[iris$Sepal.Length > 7.5 & iris$Sepal.Width < 3.5, ] 
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 106          7.6         3.0          6.6         2.1 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
tail(iris$Petal.Length)
## [1] 5.7 5.2 5.0 5.2 5.4 5.1

Data Frames: Subsetting by Filtering

What really happens here?

x <- c(1, 3, NA, 6, -5, -0.5) 
x 
## [1]  1.0  3.0   NA  6.0 -5.0 -0.5
x[c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE)] # subset using Logical values
## [1]  1.0  3.0   NA -0.5
x < 4 # chect NA 
## [1]  TRUE  TRUE    NA FALSE  TRUE  TRUE
x[x < 4]
## [1]  1.0  3.0   NA -5.0 -0.5

Data Frames: Column selecting by comparison

Example: column select

x <- colnames(iris) # iris column names 
x  
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
head(iris[,c(TRUE, FALSE, FALSE, FALSE, FALSE)]) # first column
## [1] 5.1 4.9 4.7 4.6 5.0 5.4
head(iris[,colnames(iris)==x[1]]) # first column 
## [1] 5.1 4.9 4.7 4.6 5.0 5.4

Filtering and Selecting: Better way (Teaser)

library(dplyr)
x <- filter(iris, iris$Sepal.Length > 7.5)
x
##   Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 1          7.6         3.0          6.6         2.1 virginica
## 2          7.7         3.8          6.7         2.2 virginica
## 3          7.7         2.6          6.9         2.3 virginica
## 4          7.7         2.8          6.7         2.0 virginica
## 5          7.9         3.8          6.4         2.0 virginica
## 6          7.7         3.0          6.1         2.3 virginica
select(x, Sepal.Width, Species)
##   Sepal.Width   Species
## 1         3.0 virginica
## 2         3.8 virginica
## 3         2.6 virginica
## 4         2.8 virginica
## 5         3.8 virginica
## 6         3.0 virginica

Ordering Data Frame

# order data frame by column 

iris[1:6,] # first 6 obs.  
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Ordered - ascending by Sepal.Length
iris[order(iris$Sepal.Length),][1:6,] 
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 14          4.3         3.0          1.1         0.1  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 39          4.4         3.0          1.3         0.2  setosa
## 43          4.4         3.2          1.3         0.2  setosa
## 42          4.5         2.3          1.3         0.3  setosa
## 4           4.6         3.1          1.5         0.2  setosa
# descending by Sepal.Length
iris[order(-iris$Sepal.Length),][1:6,]  
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 132          7.9         3.8          6.4         2.0 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
## 106          7.6         3.0          6.6         2.1 virginica
# by Sepal.Length (descending) and Petal.Length (ascending)
iris[order(-iris$Sepal.Length,iris$Petal.Length),][1:6,] 
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 132          7.9         3.8          6.4         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
## 118          7.7         3.8          6.7         2.2 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 106          7.6         3.0          6.6         2.1 virginica
# for vectors see sort() function 

Data Frame: Renaming Columns and Rows

colnames(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
rownames(iris)[1:5]
## [1] "1" "2" "3" "4" "5"
colnames(iris) <- c("L.Sepal","W.Sepal","L.Petal","W.Petal","Species")
colnames(iris)
## [1] "L.Sepal" "W.Sepal" "L.Petal" "W.Petal" "Species"
# if one column name is not specified 
colnames(iris) <- c("L.Sepal","W.Sepal","L.Petal","W.Petal")
colnames(iris)
## [1] "L.Sepal" "W.Sepal" "L.Petal" "W.Petal" NA
# back to original data 
data(iris) 
colnames(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
# only first and second column names 
colnames(iris)[1:2] <- c("L.Sepal","W.Sepal")
colnames(iris)
## [1] "L.Sepal"      "W.Sepal"      "Petal.Length" "Petal.Width"  "Species"
# rownames(iris) <- iris$Species
# Error in `.rowNamesDF<-`(x, value = value) :
#  duplicate 'row.names' are not allowed

rownames(iris) <- paste0(iris$Species,"_",rownames(iris))
head(rownames(iris))
## [1] "setosa_1" "setosa_2" "setosa_3" "setosa_4" "setosa_5" "setosa_6"
tail(rownames(iris))
## [1] "virginica_145" "virginica_146" "virginica_147" "virginica_148"
## [5] "virginica_149" "virginica_150"

Data Frame: Adding Column(s)

data(iris)

# adding a column, new.column with values Sepal.Length * Sepal.Width
iris$new.column <- iris$Sepal.Length * iris$Sepal.Width
head(iris[,c("Sepal.Length","Sepal.Width","new.column")])
##   Sepal.Length Sepal.Width new.column
## 1          5.1         3.5      17.85
## 2          4.9         3.0      14.70
## 3          4.7         3.2      15.04
## 4          4.6         3.1      14.26
## 5          5.0         3.6      18.00
## 6          5.4         3.9      21.06
# assume sepal size defined as large if new.column > median  
# ifelse function - teaser 
cat("Median of Sepal.Size: ",median(iris$new.column))
## Median of Sepal.Size:  17.66
iris$Sepal.Size <- ifelse(iris$new.column > median(iris$new.column), c("Large"), c("Small"))
head(iris[,c("Sepal.Length","Sepal.Width","new.column","Sepal.Size")])
##   Sepal.Length Sepal.Width new.column Sepal.Size
## 1          5.1         3.5      17.85      Large
## 2          4.9         3.0      14.70      Small
## 3          4.7         3.2      15.04      Small
## 4          4.6         3.1      14.26      Small
## 5          5.0         3.6      18.00      Large
## 6          5.4         3.9      21.06      Large
# manually 
iris$Sepal.Size2 <- "Small"  # initialization
iris$Sepal.Size2[iris$new.column > median(iris$new.column)] <- "Large"  # initialization
head(iris[,c("new.column", "Sepal.Size", "Sepal.Size2")])
##   new.column Sepal.Size Sepal.Size2
## 1      17.85      Large       Large
## 2      14.70      Small       Small
## 3      15.04      Small       Small
## 4      14.26      Small       Small
## 5      18.00      Large       Large
## 6      21.06      Large       Large

Data Frame: Adding Row(s)

# original
data(iris)
tail(iris, 2)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
# adding a row (here species is important, it is a factor)
iris <- rbind(iris,c(5,4,7,6,"virginica"))
tail(iris, 3)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9           3          5.1         1.8 virginica
## 151            5           4            7           6 virginica

Data Frame: Removing Column(s)

# original
data(iris)
tail(iris,2)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
# removing a column  
iris$Species <- NULL
tail(iris, 2)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width
## 149          6.2         3.4          5.4         2.3
## 150          5.9         3.0          5.1         1.8
# removing a multiple column 
colnames(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"
iris$Sepal.Length <- iris$Petal.Length <- NULL
tail(iris, 2)
##     Sepal.Width Petal.Width
## 149         3.4         2.3
## 150         3.0         1.8

Data Frame: Removing Row(s)

# original
data(iris)
tail(iris, 2)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
# removing a row 
iris <- iris[-150,] # remove 150^th row 
tail(iris, 3) 
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
# remove multiple rows 
head(iris,3)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
iris <- iris[-c(1,3,4),]
head(iris,3)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 2          4.9         3.0          1.4         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# remove rows based on a filter  
iris[iris$Sepal.Length > 7.6,]
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 118          7.7         3.8          6.7         2.2 virginica
## 119          7.7         2.6          6.9         2.3 virginica
## 123          7.7         2.8          6.7         2.0 virginica
## 132          7.9         3.8          6.4         2.0 virginica
## 136          7.7         3.0          6.1         2.3 virginica
iris <- iris[!iris$Sepal.Length > 7.6,] # remove obs 
iris[iris$Sepal.Length > 7.5,]
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 106          7.6           3          6.6         2.1 virginica

Pipes

pipe passes the object on its left-hand side to the first argument of the function on the right-hand side.

x %>% f(1)   = f(x, 1)  
x %>% f(1, .)= f(1, x)  

df %>% split(.$var) = split(df, df$var)  

df %>% {split(.$x, .$y)} = split(df$x, df$y)  

df %>% .$var = df$var   

# no paranthesis requred 
x %>% mean  
df %>% 
  .$var %>% 
  mean 

x |> f(1) = f(x, 1)  
x |> f(1, y = _) = f(1, y=x)  

# parantheses always necessary 
x |> mean()


# Good 
flights |>  
  filter(!is.na(arr_delay), !is.na(tailnum)) |> 
  count(dest)

# Avoid
flights|>filter(!is.na(arr_delay), !is.na(tailnum))|>count(dest)

Data Frame ➡️ Tibble

library(tibble)
# see ?tibble::tibble

dept <- c('MIS', 'ECON', 'SENG', 'CENG', 'MAN')
some_numbers <- c(18L, 19L, 14L, 25L, 22L)

fsb_tbl <- tibble(
  department = dept, 
  count = some_numbers, 
  percentage = count / sum(count))
fsb_tbl 
## # A tibble: 5 × 3
##   department count percentage
##   <chr>      <int>      <dbl>
## 1 MIS           18      0.184
## 2 ECON          19      0.194
## 3 SENG          14      0.143
## 4 CENG          25      0.255
## 5 MAN           22      0.224

Data Frame ➡️ Tibble

as.data.frame(fsb_tbl) 
##   department count percentage
## 1        MIS    18  0.1836735
## 2       ECON    19  0.1938776
## 3       SENG    14  0.1428571
## 4       CENG    25  0.2551020
## 5        MAN    22  0.2244898

Data Structures: So What is a Tibble Anyway?

Tibble is a modern reimagining of the data frame. Tibbles are designed to be (as much as possible) drop-in replacements for data frames that fix those frustrations. A concise, and fun, way to summarise the main differences is that tibbles are lazy and surly: they do less and complain more. – Hadley Wickham

To learn more about the basics of tibble, please consult the reference below: