Changed CR/LF (Windows) to CR in order to make it work with Rj on Win…

…dows
sjentsch · May 28, 2024 · 5a3d123 · 5a3d123
1 parent f590b78
commit 5a3d123
Show file tree

Hide file tree

Showing 3 changed files with 98 additions and 98 deletions.
diff --git a/Factor analysis/Syntax_CheckCorrelations.R b/Factor analysis/Syntax_CheckCorrelations.R
@@ -1,27 +1,27 @@
-# calculate a correlation for all possible combinations of variables
-# ===========================================================================
-# It is important that you adjust [2:26] in the command underneath to the
-# number of columns in your data set; in the bfi_sample dataset (that the code
-# was written for), we have first the "ID" (which we don't want to include in
-# the calculation), and afterwards the variables "A1" to "O5" (that we need).
-# Given that "A1" to "O5" are 25 variables and that we exclude the first
-# column ("ID"), we have to calculate correlations for the 2nd to the 26th
-# column, i.e., [2:26]
-# ===========================================================================
-# the following line outputs the column number and the variable name
-sprintf("%d: %s", seq(ncol(data)), names(data))
-# this information can be used to adjust [2:26] underneath
-crrMtx = abs(cor(sapply(data[2:26], jmvcore::toNumeric), use = "pairwise"))
-# the correlation with itself is always 1, we don't want to include that
-diag(crrMtx) <- NA
-# check for correlations above 0.3 - it should be
-# larger than 0; depending on number of variables
-# and number of assumed factors a rule of thumbs to
-# raise concerns is if fewer than ~10% of the variables
-# have so low correlations
-print('Number of correlations above 0.3 - should be more than 0')
-sort(apply(crrMtx > 0.3, 2, sum, na.rm = TRUE))
-# check for correlations above 0.9 - it should be
-# 0 throughout
-print('Number of correlations above 0.9 - should be 0 for all')
-sort(apply(crrMtx > 0.9, 2, sum, na.rm = TRUE))
+# calculate a correlation for all possible combinations of variables
+# ===========================================================================
+# It is important that you adjust [2:26] in the command underneath to the
+# number of columns in your data set; in the bfi_sample dataset (that the code
+# was written for), we have first the "ID" (which we don't want to include in
+# the calculation), and afterwards the variables "A1" to "O5" (that we need).
+# Given that "A1" to "O5" are 25 variables and that we exclude the first
+# column ("ID"), we have to calculate correlations for the 2nd to the 26th
+# column, i.e., [2:26]
+# ===========================================================================
+# the following line outputs the column number and the variable name
+sprintf("%d: %s", seq(ncol(data)), names(data))
+# this information can be used to adjust [2:26] underneath
+crrMtx = abs(cor(sapply(data[2:26], jmvcore::toNumeric), use = "pairwise"))
+# the correlation with itself is always 1, we don't want to include that
+diag(crrMtx) <- NA
+# check for correlations above 0.3 - it should be
+# larger than 0; depending on number of variables
+# and number of assumed factors a rule of thumbs to
+# raise concerns is if fewer than ~10% of the variables
+# have so low correlations
+print('Number of correlations above 0.3 - should be more than 0')
+sort(apply(crrMtx > 0.3, 2, sum, na.rm = TRUE))
+# check for correlations above 0.9 - it should be
+# 0 throughout
+print('Number of correlations above 0.9 - should be 0 for all')
+sort(apply(crrMtx > 0.9, 2, sum, na.rm = TRUE))
diff --git a/Regression analysis/Syntax_Outliers_Mahalanobis.R b/Regression analysis/Syntax_Outliers_Mahalanobis.R
@@ -1,19 +1,19 @@
-# this list should contain the names of your INDEPENDENT VARIABLES
-# you should not include your dependent variables
-VL = c('dan.sleep', 'baby.sleep', 'day')
-# the lines underneath calculate the Mahalanobis distance (2), and
-# whether it is above the critical chi-square value (3); if the
-# Mahalanobis distance is above the critical value which(names(...
-# will output the row number
-outRow <- names(which(
-    mahalanobis(data[, VL], colMeans(data[, VL]), cov(data[, VL])) >
-    qchisq(p = 0.001, df = length(VL), lower.tail = FALSE)))
-# the lines underneath check whether rows with outliers were found
-# if so, a string that can be used as filter expression is output,
-# otherwise a notice that no outliers were found
-if (length(outRow) > 0) {
-    # add this output to a filter to exclude
-    cat(paste0(c("", paste0("ROW() != ", outRow)), collapse = " and "))
-} else {
-    cat("There were no outliers found.")
-}
+# this list should contain the names of your INDEPENDENT VARIABLES
+# you should not include your dependent variables
+VL = c('dan.sleep', 'baby.sleep', 'day')
+# the lines underneath calculate the Mahalanobis distance (2), and
+# whether it is above the critical chi-square value (3); if the
+# Mahalanobis distance is above the critical value which(names(...
+# will output the row number
+outRow <- names(which(
+    mahalanobis(data[, VL], colMeans(data[, VL]), cov(data[, VL])) >
+    qchisq(p = 0.001, df = length(VL), lower.tail = FALSE)))
+# the lines underneath check whether rows with outliers were found
+# if so, a string that can be used as filter expression is output,
+# otherwise a notice that no outliers were found
+if (length(outRow) > 0) {
+    # add this output to a filter to exclude
+    cat(paste0(c("", paste0("ROW() != ", outRow)), collapse = " and "))
+} else {
+    cat("There were no outliers found.")
+}
diff --git a/Uni- and bivariate statistics/Syntax_z-test.R b/Uni- and bivariate statistics/Syntax_z-test.R
@@ -1,52 +1,52 @@
-# The data are contained in the file zeppo in the lsj-data library.
-# To open the file: ☰ → Open → Data Library → learning statisticss
-# ith jamovi → Zeppo
-
-# The grades of Psychology students in Dr. Zeppos class can be found
-# in the variable x.
-# calculate the mean of these scores (and show it in the output window)
-M = mean(data$x)
-M
-
-# the standard deviation is taken from the whole sample of Dr. Zeppo
-# (cf. LSJ, Ch. 11.1.1); it is quite reassuring to know that this standard
-# deviation for the whole sample and the one in our psychology subgroup is
-# (more or less) identical: 9.5 vs. 9.521
-9.5
-SD = sd(data$x)
-SD
-
-# now, we calculate the standard error which is the standard deviation divided
-# the square root of the sample size; the original sample has 20 students
-# length(data$grade) tells us how many elements are contained in the variable
-# grade
-SEM = 9.5 / sqrt(length(data$x))
-SEM
-
-# let's play a little around with that value and check what happens to
-# the standard error of mean when the sample size is varied; 
-# we compare how the standard error changes from 5 students, ...
-9.5 / sqrt(5)
-# over the orginal 20, ...
-9.5 / sqrt(20)
-# to 80 students
-9.5 / sqrt(80)
-# What you can see is that the larger the sample gets, the smaller gets
-# the standard error of mean: this is logical because, the more measurements
-# you collect the more exact will your mean be (and the smaller gets the error
-# that you make when measuring)
-#
-# we continue with substracting the mean in our psychology subgroup from the
-# mean in Dr. Zeppo's whole class (67.5) which is then divided by the
-# standard error of mean to get a z-score
-z = (M - 67.5) / SEM
-z
-# this z-value can we then compare with a standard normal distribution
-# an alpha (error probability) of 0.05 - which means we have 0.025 at 
-# the bottom and the top of the distribution (1 - 0.025 → 0.975)
-qnorm(0.975)
-# we get a critical z-value of about 1.96 which is smaller than our z-value (2.25)
-# that is our z-value (2.25) is more extreme than the cut-off (1.96) and we can
-# therefore reject H0 and assume that the average scores in the subgroup
-# of psychology students are above the average scores in the whole class
-# of Dr. Zeppo
+# The data are contained in the file zeppo in the lsj-data library.
+# To open the file: ☰ → Open → Data Library → learning statisticss
+# ith jamovi → Zeppo
+
+# The grades of Psychology students in Dr. Zeppos class can be found
+# in the variable x.
+# calculate the mean of these scores (and show it in the output window)
+M = mean(data$x)
+M
+
+# the standard deviation is taken from the whole sample of Dr. Zeppo
+# (cf. LSJ, Ch. 11.1.1); it is quite reassuring to know that this standard
+# deviation for the whole sample and the one in our psychology subgroup is
+# (more or less) identical: 9.5 vs. 9.521
+9.5
+SD = sd(data$x)
+SD
+
+# now, we calculate the standard error which is the standard deviation divided
+# the square root of the sample size; the original sample has 20 students
+# length(data$grade) tells us how many elements are contained in the variable
+# grade
+SEM = 9.5 / sqrt(length(data$x))
+SEM
+
+# let's play a little around with that value and check what happens to
+# the standard error of mean when the sample size is varied; 
+# we compare how the standard error changes from 5 students, ...
+9.5 / sqrt(5)
+# over the orginal 20, ...
+9.5 / sqrt(20)
+# to 80 students
+9.5 / sqrt(80)
+# What you can see is that the larger the sample gets, the smaller gets
+# the standard error of mean: this is logical because, the more measurements
+# you collect the more exact will your mean be (and the smaller gets the error
+# that you make when measuring)
+#
+# we continue with substracting the mean in our psychology subgroup from the
+# mean in Dr. Zeppo's whole class (67.5) which is then divided by the
+# standard error of mean to get a z-score
+z = (M - 67.5) / SEM
+z
+# this z-value can we then compare with a standard normal distribution
+# an alpha (error probability) of 0.05 - which means we have 0.025 at 
+# the bottom and the top of the distribution (1 - 0.025 → 0.975)
+qnorm(0.975)
+# we get a critical z-value of about 1.96 which is smaller than our z-value (2.25)
+# that is our z-value (2.25) is more extreme than the cut-off (1.96) and we can
+# therefore reject H0 and assume that the average scores in the subgroup
+# of psychology students are above the average scores in the whole class
+# of Dr. Zeppo