Title: | Basic Statistics |
---|---|
Description: | Basic statistical analyses. The package has been developed to be used in statistics courses at Bocconi University (Milan, Italy). Currently, the package includes some exploratory and inferential analyses usually presented in introductory statistics courses. |
Authors: | Raffaella Piccarreta [aut], Sergio Venturini [cre] |
Maintainer: | Sergio Venturini <[email protected]> |
License: | GPL-3 |
Version: | 0.2.2 |
Built: | 2024-11-19 05:55:45 UTC |
Source: | https://github.com/raffaellapiccarreta/ubstats |
CI.diffmean()
builds confidence intervals for the difference
between the means of two independent or paired populations.
CI.diffmean( x, y, type = "independent", sigma.x = NULL, sigma.y = NULL, conf.level = 0.95, by, sigma.by = NULL, sigma.d = NULL, var.test = FALSE, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
CI.diffmean( x, y, type = "independent", sigma.x = NULL, sigma.y = NULL, conf.level = 0.95, by, sigma.by = NULL, sigma.d = NULL, var.test = FALSE, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x , y
|
Unquoted strings identifying two numeric
variables with the same length whose means have to be compared.
|
type |
A length-one character vector specifying the type of samples.
Allowed values are |
sigma.x , sigma.y
|
Optional numeric values specifying
the possibly known populations' standard deviations
(when |
conf.level |
Numeric value specifying the required confidence level; default to 0.95. |
by |
Optional unquoted string, available only when
|
sigma.by |
Optional numeric value specifying the possibly known
standard deviations for the two independent samples identified via
|
sigma.d |
Optional numeric value specifying the possibly known standard deviation of the difference when samples are paired. |
var.test |
Logical value indicating whether to run a test on the equality of variance for two (independent) samples or not (default). |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the confidence intervals for the difference between the populations' means. For independent samples in the case of unknown variances, the intervals are built both under the assumption that the variances are equal and under the assumption that they differ, using percentiles from both the normal and the Student's t distribution. If
Raffaella Piccarreta [email protected]
TEST.diffmean()
to test hypotheses on the
difference between two populations' means.
data(MktDATA, package = "UBStats") # Independent samples (default type), UNKNOWN variances # CI for the difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] CI.diffmean(x = AOV_M, y = AOV_F) # - Change confidence level CI.diffmean(x = AOV_M, y = AOV_F, conf.level = 0.99) # - Using x,by: groups identified by ordered levels of by CI.diffmean(x = AOV, by = Gender, conf.level = 0.99, data = MktDATA) # Since order is F, M, CI is for mean(F) - mean(M) # To get the interval for mean(M) - mean(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffmean(x = AOV, by = Gender.R, conf.level = 0.99, data = MktDATA) # - Testing hypotheses on equality of unknown variances CI.diffmean(x = AOV_M, y = AOV_F, conf.level = 0.99, var.test = TRUE) # - Output results: only information on the CI out.ci_diffM<-CI.diffmean(x = AOV_M, y = AOV_F) # - Output results: list with information on CI and test on var out.ci_diffM.V<-CI.diffmean(x = AOV_M, y = AOV_F, var.test = TRUE) # Independent samples (default type), KNOWN variances # CI for the difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] CI.diffmean(x = AOV_M, y = AOV_F, sigma.x = 10, sigma.y = 20) # - Using x,by: groups identified by ordered levels of by CI.diffmean(x = AOV, by = Gender, sigma.by = c("M" = 10, "F"=20), data = MktDATA) # To change the sign, order levels as desired Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffmean(x = AOV, by = Gender.R, sigma.by = c("M" = 10, "F"=20), data = MktDATA) # - Output results out.ci_diffM<-CI.diffmean(x = AOV_M, y = AOV_F, sigma.x = 10, sigma.y = 20) # Paired samples: UNKNOWN variances # - Default settings CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", data=MktDATA) # - Change confidence level CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", conf.level = 0.9, data = MktDATA) # Paired: KNOWN variances CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", conf.level = 0.9, sigma.d = 2, data = MktDATA) # - Output results out.ci_diffM<-CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", conf.level = 0.9, sigma.d = 2, data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/5000 SmallX_M <- SmallX[MktDATA$Gender == "M"] SmallX_F <- SmallX[MktDATA$Gender == "F"] # - Default: manages possible excess of rounding CI.diffmean(x = SmallX_M, y = SmallX_F) # - Force to the requested nr of digits (default, 2) CI.diffmean(x = SmallX_M, y = SmallX_F, force.digits = TRUE) # - Allow scientific notation CI.diffmean(x = SmallX_M, y = SmallX_F, use.scientific = TRUE)
data(MktDATA, package = "UBStats") # Independent samples (default type), UNKNOWN variances # CI for the difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] CI.diffmean(x = AOV_M, y = AOV_F) # - Change confidence level CI.diffmean(x = AOV_M, y = AOV_F, conf.level = 0.99) # - Using x,by: groups identified by ordered levels of by CI.diffmean(x = AOV, by = Gender, conf.level = 0.99, data = MktDATA) # Since order is F, M, CI is for mean(F) - mean(M) # To get the interval for mean(M) - mean(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffmean(x = AOV, by = Gender.R, conf.level = 0.99, data = MktDATA) # - Testing hypotheses on equality of unknown variances CI.diffmean(x = AOV_M, y = AOV_F, conf.level = 0.99, var.test = TRUE) # - Output results: only information on the CI out.ci_diffM<-CI.diffmean(x = AOV_M, y = AOV_F) # - Output results: list with information on CI and test on var out.ci_diffM.V<-CI.diffmean(x = AOV_M, y = AOV_F, var.test = TRUE) # Independent samples (default type), KNOWN variances # CI for the difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] CI.diffmean(x = AOV_M, y = AOV_F, sigma.x = 10, sigma.y = 20) # - Using x,by: groups identified by ordered levels of by CI.diffmean(x = AOV, by = Gender, sigma.by = c("M" = 10, "F"=20), data = MktDATA) # To change the sign, order levels as desired Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffmean(x = AOV, by = Gender.R, sigma.by = c("M" = 10, "F"=20), data = MktDATA) # - Output results out.ci_diffM<-CI.diffmean(x = AOV_M, y = AOV_F, sigma.x = 10, sigma.y = 20) # Paired samples: UNKNOWN variances # - Default settings CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", data=MktDATA) # - Change confidence level CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", conf.level = 0.9, data = MktDATA) # Paired: KNOWN variances CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", conf.level = 0.9, sigma.d = 2, data = MktDATA) # - Output results out.ci_diffM<-CI.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", conf.level = 0.9, sigma.d = 2, data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/5000 SmallX_M <- SmallX[MktDATA$Gender == "M"] SmallX_F <- SmallX[MktDATA$Gender == "F"] # - Default: manages possible excess of rounding CI.diffmean(x = SmallX_M, y = SmallX_F) # - Force to the requested nr of digits (default, 2) CI.diffmean(x = SmallX_M, y = SmallX_F, force.digits = TRUE) # - Allow scientific notation CI.diffmean(x = SmallX_M, y = SmallX_F, use.scientific = TRUE)
CI.diffprop()
builds confidence intervals for the difference
between the proportion of successes in two independent populations.
CI.diffprop( x, y, success.x = NULL, success.y = NULL, conf.level = 0.95, by, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
CI.diffprop( x, y, success.x = NULL, success.y = NULL, conf.level = 0.95, by, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x , y
|
Unquoted strings identifying the variables of
interest. |
success.x , success.y
|
If |
conf.level |
Numeric value specifying the required confidence level; default to 0.95. |
by |
Optional unquoted string identifying a variable
(of any type), defined same way as |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the confidence intervals for the difference between the proportions of successes in two independent populations.
Raffaella Piccarreta [email protected]
TEST.diffprop()
to test hypotheses on the difference
between the proportions of successes in two populations.
data(MktDATA, package = "UBStats") # Proportions of success defined on non-binary and # non-logical vectors; 'success' coded same way # for both vectors # - Using x,y: build vectors with data on the two groups WouldSuggest_F <- MktDATA$WouldSuggest[MktDATA$Gender == "F"] WouldSuggest_M <- MktDATA$WouldSuggest[MktDATA$Gender == "M"] CI.diffprop(x = WouldSuggest_M, y = WouldSuggest_F, success.x = "Yes") PastCampaigns_F<-MktDATA$PastCampaigns[MktDATA$Gender=="F"] PastCampaigns_M<-MktDATA$PastCampaigns[MktDATA$Gender=="M"] CI.diffprop(x = PastCampaigns_M, y = PastCampaigns_F, success.x = 0, conf.level = 0.99) # - Using x,by: groups identified by ordered levels of by CI.diffprop(x = PastCampaigns, by = Gender, success.x=0, conf.level = 0.99, data = MktDATA) # Since order is F, M, CI is for prop(F) - prop(M) # To get the interval for prop(M) - prop(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffprop(x = PastCampaigns, by = Gender.R, success.x=0, conf.level = 0.99, data = MktDATA) # Proportions of success defined based on # binary or logical vectors; 'success' # coded same way for both vectors # - Binary variable (success=1): based on x,y LastCampaign_F<-MktDATA$LastCampaign[MktDATA$Gender=="F"] LastCampaign_M<-MktDATA$LastCampaign[MktDATA$Gender=="M"] CI.diffprop(x = LastCampaign_M, y = LastCampaign_F) # - Binary variable (success=1): based on x,y # see above for recoding of levels of Gender Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffprop(x = LastCampaign, by = Gender.R, data = MktDATA) # - Logical variable (success=TRUE): based on x,y Deals_w_child <- MktDATA$Deals.ge50[MktDATA$Children>0] Deals_no_child <- MktDATA$Deals.ge50[MktDATA$Children==0] CI.diffprop(x = Deals_w_child, y = Deals_no_child, conf.level = 0.9) # Proportions defined on # non-binary and non-logical vectors, with 'success' # coded differently (only specification x,y is reasonable here) WouldSuggest_Other<-c(rep("OK",310),rep("KO",650-310)) CI.diffprop(x = WouldSuggest, y = WouldSuggest_Other, success.x = "Yes", success.y = "OK", data = MktDATA) # Proportions based on combined conditions # - Build logical vector/s indicating whether a condition # is satisfied IsTop<-MktDATA$AOV>80 IsTop_OK<-IsTop[MktDATA$WouldSuggest == "Yes"] IsTop_KO<-IsTop[MktDATA$WouldSuggest == "No"] CI.diffprop(x = IsTop_OK, y = IsTop_KO, conf.level = 0.9) Deals<-MktDATA$NDeals>=5 Deals_Married <- Deals[MktDATA$Marital_Status=="Married" & MktDATA$Children==0] Deals_Single <- Deals[MktDATA$Marital_Status=="Single"] CI.diffprop(x = Deals_Married, y = Deals_Single, conf.level = 0.9) # Output results Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) out.ci_diffP<-CI.diffprop(x = PastCampaigns, by = Gender.R, success.x=0, conf.level = 0.99, data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values HighAOV <- MktDATA$AOV>150 # - Default: manages possible excess of rounding CI.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"]) # - Force to the exact number of digits (default, 2) CI.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], force.digits = TRUE) # - Allow scientific notation CI.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], use.scientific = TRUE)
data(MktDATA, package = "UBStats") # Proportions of success defined on non-binary and # non-logical vectors; 'success' coded same way # for both vectors # - Using x,y: build vectors with data on the two groups WouldSuggest_F <- MktDATA$WouldSuggest[MktDATA$Gender == "F"] WouldSuggest_M <- MktDATA$WouldSuggest[MktDATA$Gender == "M"] CI.diffprop(x = WouldSuggest_M, y = WouldSuggest_F, success.x = "Yes") PastCampaigns_F<-MktDATA$PastCampaigns[MktDATA$Gender=="F"] PastCampaigns_M<-MktDATA$PastCampaigns[MktDATA$Gender=="M"] CI.diffprop(x = PastCampaigns_M, y = PastCampaigns_F, success.x = 0, conf.level = 0.99) # - Using x,by: groups identified by ordered levels of by CI.diffprop(x = PastCampaigns, by = Gender, success.x=0, conf.level = 0.99, data = MktDATA) # Since order is F, M, CI is for prop(F) - prop(M) # To get the interval for prop(M) - prop(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffprop(x = PastCampaigns, by = Gender.R, success.x=0, conf.level = 0.99, data = MktDATA) # Proportions of success defined based on # binary or logical vectors; 'success' # coded same way for both vectors # - Binary variable (success=1): based on x,y LastCampaign_F<-MktDATA$LastCampaign[MktDATA$Gender=="F"] LastCampaign_M<-MktDATA$LastCampaign[MktDATA$Gender=="M"] CI.diffprop(x = LastCampaign_M, y = LastCampaign_F) # - Binary variable (success=1): based on x,y # see above for recoding of levels of Gender Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) CI.diffprop(x = LastCampaign, by = Gender.R, data = MktDATA) # - Logical variable (success=TRUE): based on x,y Deals_w_child <- MktDATA$Deals.ge50[MktDATA$Children>0] Deals_no_child <- MktDATA$Deals.ge50[MktDATA$Children==0] CI.diffprop(x = Deals_w_child, y = Deals_no_child, conf.level = 0.9) # Proportions defined on # non-binary and non-logical vectors, with 'success' # coded differently (only specification x,y is reasonable here) WouldSuggest_Other<-c(rep("OK",310),rep("KO",650-310)) CI.diffprop(x = WouldSuggest, y = WouldSuggest_Other, success.x = "Yes", success.y = "OK", data = MktDATA) # Proportions based on combined conditions # - Build logical vector/s indicating whether a condition # is satisfied IsTop<-MktDATA$AOV>80 IsTop_OK<-IsTop[MktDATA$WouldSuggest == "Yes"] IsTop_KO<-IsTop[MktDATA$WouldSuggest == "No"] CI.diffprop(x = IsTop_OK, y = IsTop_KO, conf.level = 0.9) Deals<-MktDATA$NDeals>=5 Deals_Married <- Deals[MktDATA$Marital_Status=="Married" & MktDATA$Children==0] Deals_Single <- Deals[MktDATA$Marital_Status=="Single"] CI.diffprop(x = Deals_Married, y = Deals_Single, conf.level = 0.9) # Output results Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) out.ci_diffP<-CI.diffprop(x = PastCampaigns, by = Gender.R, success.x=0, conf.level = 0.99, data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values HighAOV <- MktDATA$AOV>150 # - Default: manages possible excess of rounding CI.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"]) # - Force to the exact number of digits (default, 2) CI.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], force.digits = TRUE) # - Allow scientific notation CI.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], use.scientific = TRUE)
CI.mean()
builds confidence intervals for the mean of a population.
CI.mean( x, sigma = NULL, conf.level = 0.95, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
CI.mean( x, sigma = NULL, conf.level = 0.95, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the numeric
variable whose mean is of interest. |
sigma |
An optional numeric value specifying the
population standard deviation. If |
conf.level |
Numeric value specifying the required confidence level; default to 0.95. |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the confidence interval for the population mean. If the variance is unknown, the interval is built using percentiles from both the normal and the Student's t distribution.
Raffaella Piccarreta [email protected]
TEST.mean()
to test hypotheses on a population
mean.
data(MktDATA, package = "UBStats") # CI for the mean with KNOWN variance; default options CI.mean(AOV, sigma = 30, data = MktDATA) # CI for the mean with UNKNOWN variance; # - change digits and confidence level 0.99 CI.mean(AOV, conf.level = 0.99, digits = 3, data = MktDATA) # Arguments force.digits and use.scientific # A variable taking very small values SmallX<-MktDATA$AOV/5000 # - Default: manages possible excess of rounding CI.mean(SmallX) # - Forcing digits to the default values (2) CI.mean(SmallX, force.digits = TRUE) # - Allow scientific notation CI.mean(SmallX, use.scientific = TRUE) # Output the table with the requested interval out.ci_mean<-CI.mean(AOV, data = MktDATA)
data(MktDATA, package = "UBStats") # CI for the mean with KNOWN variance; default options CI.mean(AOV, sigma = 30, data = MktDATA) # CI for the mean with UNKNOWN variance; # - change digits and confidence level 0.99 CI.mean(AOV, conf.level = 0.99, digits = 3, data = MktDATA) # Arguments force.digits and use.scientific # A variable taking very small values SmallX<-MktDATA$AOV/5000 # - Default: manages possible excess of rounding CI.mean(SmallX) # - Forcing digits to the default values (2) CI.mean(SmallX, force.digits = TRUE) # - Allow scientific notation CI.mean(SmallX, use.scientific = TRUE) # Output the table with the requested interval out.ci_mean<-CI.mean(AOV, data = MktDATA)
CI.prop()
builds confidence intervals for the proportion of
successes in a population.
CI.prop( x, success = NULL, conf.level = 0.95, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
CI.prop( x, success = NULL, conf.level = 0.95, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the variable of interest.
|
success |
If |
conf.level |
Numeric value specifying the required confidence level; default to 0.95. |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the confidence intervals for the population proportion of successes.
Raffaella Piccarreta [email protected]
TEST.prop()
to test hypotheses on the
proportion of successes in a population.
data(MktDATA, package = "UBStats") # Success = one value of a character vector or factor CI.prop(WouldSuggest, success = "Yes", data = MktDATA) # - change confidence level and rounding CI.prop(Education, success = "Post-Grad", conf.level = 0.9, digits = 4, data = MktDATA) # Success = numeric value CI.prop(Children, success = 2, data = MktDATA) # Binary variable ('success' is 1 by default) CI.prop(LastCampaign, digits = 3, data = MktDATA) # Logical variable ('success' is TRUE by default) CI.prop(RespCampaign, conf.level = 0.9, digits = 3, data = MktDATA) # Success based on combined conditions # - Build a (logical) vector indicating whether a condition is satisfied IsTop <- MktDATA$CustClass == "Gold" | MktDATA$CustClass == "Platinum" CI.prop(IsTop, conf.level = 0.9) # - A very rare event HighAOV <- MktDATA$AOV>150 CI.prop(HighAOV, conf.level = 0.9) # Arguments force.digits, use.scientific # - Default: manages possible excess of rounding CI.prop(HighAOV) # - Forcing digits to the default values (2) CI.prop(HighAOV, force.digits = TRUE) # - Allow scientific notation CI.prop(HighAOV, use.scientific = TRUE) # Output results out_ci_prop<-CI.prop(HighAOV)
data(MktDATA, package = "UBStats") # Success = one value of a character vector or factor CI.prop(WouldSuggest, success = "Yes", data = MktDATA) # - change confidence level and rounding CI.prop(Education, success = "Post-Grad", conf.level = 0.9, digits = 4, data = MktDATA) # Success = numeric value CI.prop(Children, success = 2, data = MktDATA) # Binary variable ('success' is 1 by default) CI.prop(LastCampaign, digits = 3, data = MktDATA) # Logical variable ('success' is TRUE by default) CI.prop(RespCampaign, conf.level = 0.9, digits = 3, data = MktDATA) # Success based on combined conditions # - Build a (logical) vector indicating whether a condition is satisfied IsTop <- MktDATA$CustClass == "Gold" | MktDATA$CustClass == "Platinum" CI.prop(IsTop, conf.level = 0.9) # - A very rare event HighAOV <- MktDATA$AOV>150 CI.prop(HighAOV, conf.level = 0.9) # Arguments force.digits, use.scientific # - Default: manages possible excess of rounding CI.prop(HighAOV) # - Forcing digits to the default values (2) CI.prop(HighAOV, force.digits = TRUE) # - Allow scientific notation CI.prop(HighAOV, use.scientific = TRUE) # Output results out_ci_prop<-CI.prop(HighAOV)
distr.plot.x()
generates plots of a univariate distribution.
distr.plot.x( x, freq = "counts", plot.type, ord.freq = "none", breaks, adj.breaks = TRUE, interval = FALSE, bw = FALSE, color = NULL, use.scientific = FALSE, data, ... )
distr.plot.x( x, freq = "counts", plot.type, ord.freq = "none", breaks, adj.breaks = TRUE, interval = FALSE, bw = FALSE, color = NULL, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the variable whose
distribution has to be analysed. |
freq |
A single character specifying the frequencies to be
displayed. Allowed options (possibly abbreviated) are |
plot.type |
A single character specifying the type of plot to build.
Allowed options are |
ord.freq |
A single character vector that can be specified when
|
breaks |
Allows to classify a numerical variable |
adj.breaks |
Logical value indicating whether the endpoints of
intervals of a numerical variable |
interval |
Logical value indicating whether |
bw |
Logical value indicating whether plots should be colored
in scale of greys ( |
color |
Optional string vector allowing to specify colors
to use in the plot rather than a standard palette
( |
use.scientific |
Logical value indicating whether numbers on
axes should be displayed using scientific notation
( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
No return value, called for side effects.
Raffaella Piccarreta [email protected]
distr.table.x()
for tabulating a univariate
distribution.
distr.table.xy()
for tabulating a bivariate
distribution.
distr.plot.xy()
for plotting a bivariate
distribution.
data(MktDATA, package = "UBStats") # Pie charts # - A character variable: grey scale distr.plot.x(x = LikeMost, plot.type = "pie", bw = TRUE, data = MktDATA) # - A discrete numeric variable: user-defined palette distr.plot.x(x = Children, plot.type = "pie", color=c("red","gold","green","forestgreen"), data = MktDATA) # Bar charts # - A factor: standard order of levels distr.plot.x(x = Education, plot.type = "bars", freq = "percentage", data = MktDATA) # - A factor: levels arranged by decreasing percentage distr.plot.x(x = Education, plot.type = "bars", freq = "perc", ord.freq = "dec", data = MktDATA) # - A discrete variable (note: distance between values # not taken into account) distr.plot.x(x = NPickUp_Purch, plot.type = "bars", freq = "percentage", data = MktDATA) # Spike plots # - A discrete variable distr.plot.x(x = NPickUp_Purch, plot.type = "spike", freq = "percent", data = MktDATA) # - A factor (levels placed at the same distance) distr.plot.x(x = Education, plot.type = "spike", freq = "prop",data = MktDATA) # - A variable measured in classes (levels placed at the # same distance) distr.plot.x(x = Income.S, interval = TRUE, plot.type = "spike", freq = "prop",data = MktDATA) # - A numeric variable classified into intervals # (levels placed at the same distance) distr.plot.x(x = AOV, breaks = 5, plot.type = "spike", data = MktDATA) # Cumulative distribution plots # - A discrete variable distr.plot.x(x = Children, plot.type = "cum", data = MktDATA) # - A continuous numerical variable distr.plot.x(x = AOV, plot.type = "cum", freq = "perc", data = MktDATA) # - A numeric variable classified into intervals distr.plot.x(AOV, plot.type = "cum", breaks = c(0,20,40,60,80,100,180), data = MktDATA) # - A variable measured in classes distr.plot.x(Income, plot.type = "cum", interval = TRUE, freq = "percent", data = MktDATA) # - A factor distr.plot.x(x = Education, plot.type = "cum", freq = "prop",data = MktDATA) # Histograms # - A continuous numerical variable: no breaks provided # default classes built by R distr.plot.x(x = AOV, plot.type = "histogram", data = MktDATA) # - A continuous numerical variable: equal width intervals distr.plot.x(x = AOV, plot.type = "histogram", breaks = 10, data = MktDATA) # - A continuous numerical variable: specified breaks distr.plot.x(AOV, plot.type = "histogram", breaks = c(0,20,40,60,80,100,180), data = MktDATA) # - A variable measured in classes distr.plot.x(Income, plot.type = "histogram", interval = TRUE, data = MktDATA) # Density plots # - A numerical variable distr.plot.x(x = AOV, plot.type = "density", data = MktDATA) # - A numerical variable: breaks are ignored distr.plot.x(AOV, plot.type = "density", breaks = c(0,20,40,60,80,100,180), data = MktDATA) # - A variable measured in classes distr.plot.x(Income, plot.type = "density", interval = TRUE, data = MktDATA) # Boxplots (only for numerical unclassified variables) # - A numerical variable distr.plot.x(x = TotVal, plot.type = "boxplot", data = MktDATA) # - A numerical variable: with specified breaks # the plot is not built # distr.plot.x(AOV, plot.type = "boxplot", # breaks = c(0,20,40,60,80,100,180), # data = MktDATA) # Arguments adj.breaks, use.scientific # A variable with a very wide range (very small densities) LargeX<-MktDATA$AOV*5000000 # - Default formatting for intervals' endpoints distr.plot.x(LargeX, breaks = 5, plot.type = "spike") # - Scientific notation for intervals' endpoints distr.plot.x(LargeX, breaks = 5,plot.type = "spike", adj.breaks = FALSE) # - Default formatting for axes distr.plot.x(LargeX, breaks = 5,plot.type = "histogram", freq = "densities") # - Scientific notation for axes distr.plot.x(LargeX, breaks = 5,plot.type = "histogram", freq = "densities",use.scientific = TRUE)
data(MktDATA, package = "UBStats") # Pie charts # - A character variable: grey scale distr.plot.x(x = LikeMost, plot.type = "pie", bw = TRUE, data = MktDATA) # - A discrete numeric variable: user-defined palette distr.plot.x(x = Children, plot.type = "pie", color=c("red","gold","green","forestgreen"), data = MktDATA) # Bar charts # - A factor: standard order of levels distr.plot.x(x = Education, plot.type = "bars", freq = "percentage", data = MktDATA) # - A factor: levels arranged by decreasing percentage distr.plot.x(x = Education, plot.type = "bars", freq = "perc", ord.freq = "dec", data = MktDATA) # - A discrete variable (note: distance between values # not taken into account) distr.plot.x(x = NPickUp_Purch, plot.type = "bars", freq = "percentage", data = MktDATA) # Spike plots # - A discrete variable distr.plot.x(x = NPickUp_Purch, plot.type = "spike", freq = "percent", data = MktDATA) # - A factor (levels placed at the same distance) distr.plot.x(x = Education, plot.type = "spike", freq = "prop",data = MktDATA) # - A variable measured in classes (levels placed at the # same distance) distr.plot.x(x = Income.S, interval = TRUE, plot.type = "spike", freq = "prop",data = MktDATA) # - A numeric variable classified into intervals # (levels placed at the same distance) distr.plot.x(x = AOV, breaks = 5, plot.type = "spike", data = MktDATA) # Cumulative distribution plots # - A discrete variable distr.plot.x(x = Children, plot.type = "cum", data = MktDATA) # - A continuous numerical variable distr.plot.x(x = AOV, plot.type = "cum", freq = "perc", data = MktDATA) # - A numeric variable classified into intervals distr.plot.x(AOV, plot.type = "cum", breaks = c(0,20,40,60,80,100,180), data = MktDATA) # - A variable measured in classes distr.plot.x(Income, plot.type = "cum", interval = TRUE, freq = "percent", data = MktDATA) # - A factor distr.plot.x(x = Education, plot.type = "cum", freq = "prop",data = MktDATA) # Histograms # - A continuous numerical variable: no breaks provided # default classes built by R distr.plot.x(x = AOV, plot.type = "histogram", data = MktDATA) # - A continuous numerical variable: equal width intervals distr.plot.x(x = AOV, plot.type = "histogram", breaks = 10, data = MktDATA) # - A continuous numerical variable: specified breaks distr.plot.x(AOV, plot.type = "histogram", breaks = c(0,20,40,60,80,100,180), data = MktDATA) # - A variable measured in classes distr.plot.x(Income, plot.type = "histogram", interval = TRUE, data = MktDATA) # Density plots # - A numerical variable distr.plot.x(x = AOV, plot.type = "density", data = MktDATA) # - A numerical variable: breaks are ignored distr.plot.x(AOV, plot.type = "density", breaks = c(0,20,40,60,80,100,180), data = MktDATA) # - A variable measured in classes distr.plot.x(Income, plot.type = "density", interval = TRUE, data = MktDATA) # Boxplots (only for numerical unclassified variables) # - A numerical variable distr.plot.x(x = TotVal, plot.type = "boxplot", data = MktDATA) # - A numerical variable: with specified breaks # the plot is not built # distr.plot.x(AOV, plot.type = "boxplot", # breaks = c(0,20,40,60,80,100,180), # data = MktDATA) # Arguments adj.breaks, use.scientific # A variable with a very wide range (very small densities) LargeX<-MktDATA$AOV*5000000 # - Default formatting for intervals' endpoints distr.plot.x(LargeX, breaks = 5, plot.type = "spike") # - Scientific notation for intervals' endpoints distr.plot.x(LargeX, breaks = 5,plot.type = "spike", adj.breaks = FALSE) # - Default formatting for axes distr.plot.x(LargeX, breaks = 5,plot.type = "histogram", freq = "densities") # - Scientific notation for axes distr.plot.x(LargeX, breaks = 5,plot.type = "histogram", freq = "densities",use.scientific = TRUE)
distr.plot.xy()
generates plots of a bivariate distribution.
distr.plot.xy( x, y, plot.type, bar.type = "stacked", freq = "counts", freq.type = "joint", breaks.x, breaks.y, interval.x = FALSE, interval.y = FALSE, bw = FALSE, color = NULL, var.c, breaks.c, interval.c = FALSE, adj.breaks = TRUE, fitline = FALSE, legend = TRUE, use.scientific = FALSE, data, ... )
distr.plot.xy( x, y, plot.type, bar.type = "stacked", freq = "counts", freq.type = "joint", breaks.x, breaks.y, interval.x = FALSE, interval.y = FALSE, bw = FALSE, color = NULL, var.c, breaks.c, interval.c = FALSE, adj.breaks = TRUE, fitline = FALSE, legend = TRUE, use.scientific = FALSE, data, ... )
x , y
|
Unquoted strings identifying the variables whose
distribution has to be graphically displayed. |
plot.type |
A single character specifying the type of plot to build.
Allowed options are |
bar.type |
A single character indicating whether in a bar plot
stacked ( |
freq |
A single character specifying the frequencies
to be displayed when a bar plot is requested ( |
freq.type |
A single character specifying the type of
frequencies to be displayed when a bar plot is requested
( |
breaks.x , breaks.y
|
Allow to classify the variables |
interval.x , interval.y
|
Logical values indicating whether
|
bw |
Logical value indicating whether plots should be colored
in scale of greys ( |
color |
Optional string vector allowing to specify colors
to use in the plot rather than a standard palette
( |
var.c |
An optional unquoted string identifying one variable
used to color points in a scatter plot ( |
breaks.c |
Allows to classify the variable |
interval.c |
Logical value indicating whether |
adj.breaks |
Logical value indicating whether the endpoints of
intervals of a numerical variable ( |
fitline |
Logical value indicating whether the line of best fit (also
called trend line or regression line) should be added to a scatter plot
( |
legend |
Logical value indicating whether a legend should be displayed
in the plot ( |
use.scientific |
Logical value indicating whether numbers on
axes should be displayed using scientific notation
( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
No return value, called for side effects.
Raffaella Piccarreta [email protected]
distr.table.xy()
for tabulating a bivariate
distribution.
distr.table.x()
for tabulating a univariate
distribution.
distr.plot.x()
for plotting a univariate
distribution.
data(MktDATA, package = "UBStats") # Bivariate bar plots # - Two discrete variables (factor or vector with few levels) # Joint counts distr.plot.xy(CustClass, Children,plot.type = "bars", freq = "Counts", freq.type = "joint", data = MktDATA) # - Two discrete variables (factor or vector with few levels) # Joint percentages, side-by-side bars # User-defined colors distr.plot.xy(Children,CustClass, plot.type = "bars", bar.type = "beside", freq = "percent", freq.type = "joint", color = c("red","gold","green","forestgreen"), data = MktDATA) # - One numeric variable classified into intervals # and one variable measured in classes # Conditional percentages of x|y distr.plot.xy(TotPurch, Income, plot.type = "bars", freq = "percent",freq.type = "x|y", breaks.x = c(0,5,10,15,20,35), interval.y = TRUE, data = MktDATA) # Conditional percentages of y|x distr.plot.xy(TotPurch, Income, plot.type = "bars", freq = "percent",freq.type = "y|x", breaks.x = c(0,5,10,15,20,35), interval.y = TRUE, data = MktDATA) # Side-by-side boxplots # - A continuous variable conditioned to a factor, # a character, or a classified variable # The distributions of the numeric variable conditioned # to the factor (or character) are displayed distr.plot.xy(x = AOV, y = Education, plot.type = "boxplot", data = MktDATA) distr.plot.xy(x = Income.S, y = AOV, plot.type = "boxplot", interval.x = TRUE, data = MktDATA) distr.plot.xy(x = Baseline, y = TotPurch, plot.type = "boxplot", breaks.y = c(0,5,10,15,20,35), data = MktDATA) # - Two numerical variables. By default distributions # of y|x are displayed unless differently # specified in freq.type distr.plot.xy(x = NPickUp_Purch, y = NWeb_Purch, plot.type = "boxplot", data = MktDATA) distr.plot.xy(x = NPickUp_Purch, y = NWeb_Purch, plot.type = "boxplot",freq.type = "x|y", data = MktDATA) # Scatter plots # - Two numerical variables: default options distr.plot.xy(Baseline, TotVal, plot.type = "scatter", fitline = TRUE, data = MktDATA) # - Two numerical variables: colors based on discrete var distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = Marital_Status, fitline = TRUE, data = MktDATA) distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = Income, interval.c = TRUE, fitline = TRUE, data = MktDATA) distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = TotPurch, breaks.c = 10, fitline = TRUE, data = MktDATA) # - Two numerical variables: colors based # on a continuous numerical variable distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = AOV, fitline = TRUE, data = MktDATA) # - One numerical variable and one factor or character distr.plot.xy(Baseline, Marital_Status, plot.type = "scatter", fitline = TRUE, data = MktDATA) distr.plot.xy(Income.S, Baseline, plot.type = "scatter", interval.x = TRUE, fitline = TRUE, data = MktDATA) # color based on a third variable distr.plot.xy(TotPurch, TotVal, plot.type = "scatter", breaks.x = c(0,5,10,15,20,35), var.c = AOV, fitline = TRUE, data = MktDATA) # - Two factors or character vectors: bubble plots distr.plot.xy(Education, LikeMost, plot.type = "scatter", data = MktDATA) # - Two classified variables (i.e. not properly numerical): # bubble plots, changed color distr.plot.xy(Income.S, TotPurch, plot.type = "scatter", interval.x = TRUE, breaks.y = c(0,5,10,15,20,35), color = "orchid", data = MktDATA) # Arguments adj.breaks and use.scientific # Variable with very wide ranges LargeC<-MktDATA$AOV*5000000 LargeX<-MktDATA$Baseline*1000000 LargeY<-MktDATA$TotVal*1000000 # - Default: no scientific notation distr.plot.xy(LargeX, LargeY, plot.type = "scatter", var.c = LargeC, data = MktDATA) distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, data = MktDATA) # - Scientific notation for axes distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, use.scientific = TRUE, data = MktDATA) # - Scientific notation for intervals' endpoints distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, adj.breaks = FALSE, data = MktDATA) # - Scientific notation for intervals endpoints and axes distr.plot.xy(LargeX, LargeY, plot.type = "scatter", var.c = LargeC, fitline = TRUE, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA) distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA)
data(MktDATA, package = "UBStats") # Bivariate bar plots # - Two discrete variables (factor or vector with few levels) # Joint counts distr.plot.xy(CustClass, Children,plot.type = "bars", freq = "Counts", freq.type = "joint", data = MktDATA) # - Two discrete variables (factor or vector with few levels) # Joint percentages, side-by-side bars # User-defined colors distr.plot.xy(Children,CustClass, plot.type = "bars", bar.type = "beside", freq = "percent", freq.type = "joint", color = c("red","gold","green","forestgreen"), data = MktDATA) # - One numeric variable classified into intervals # and one variable measured in classes # Conditional percentages of x|y distr.plot.xy(TotPurch, Income, plot.type = "bars", freq = "percent",freq.type = "x|y", breaks.x = c(0,5,10,15,20,35), interval.y = TRUE, data = MktDATA) # Conditional percentages of y|x distr.plot.xy(TotPurch, Income, plot.type = "bars", freq = "percent",freq.type = "y|x", breaks.x = c(0,5,10,15,20,35), interval.y = TRUE, data = MktDATA) # Side-by-side boxplots # - A continuous variable conditioned to a factor, # a character, or a classified variable # The distributions of the numeric variable conditioned # to the factor (or character) are displayed distr.plot.xy(x = AOV, y = Education, plot.type = "boxplot", data = MktDATA) distr.plot.xy(x = Income.S, y = AOV, plot.type = "boxplot", interval.x = TRUE, data = MktDATA) distr.plot.xy(x = Baseline, y = TotPurch, plot.type = "boxplot", breaks.y = c(0,5,10,15,20,35), data = MktDATA) # - Two numerical variables. By default distributions # of y|x are displayed unless differently # specified in freq.type distr.plot.xy(x = NPickUp_Purch, y = NWeb_Purch, plot.type = "boxplot", data = MktDATA) distr.plot.xy(x = NPickUp_Purch, y = NWeb_Purch, plot.type = "boxplot",freq.type = "x|y", data = MktDATA) # Scatter plots # - Two numerical variables: default options distr.plot.xy(Baseline, TotVal, plot.type = "scatter", fitline = TRUE, data = MktDATA) # - Two numerical variables: colors based on discrete var distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = Marital_Status, fitline = TRUE, data = MktDATA) distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = Income, interval.c = TRUE, fitline = TRUE, data = MktDATA) distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = TotPurch, breaks.c = 10, fitline = TRUE, data = MktDATA) # - Two numerical variables: colors based # on a continuous numerical variable distr.plot.xy(Baseline, TotVal, plot.type = "scatter", var.c = AOV, fitline = TRUE, data = MktDATA) # - One numerical variable and one factor or character distr.plot.xy(Baseline, Marital_Status, plot.type = "scatter", fitline = TRUE, data = MktDATA) distr.plot.xy(Income.S, Baseline, plot.type = "scatter", interval.x = TRUE, fitline = TRUE, data = MktDATA) # color based on a third variable distr.plot.xy(TotPurch, TotVal, plot.type = "scatter", breaks.x = c(0,5,10,15,20,35), var.c = AOV, fitline = TRUE, data = MktDATA) # - Two factors or character vectors: bubble plots distr.plot.xy(Education, LikeMost, plot.type = "scatter", data = MktDATA) # - Two classified variables (i.e. not properly numerical): # bubble plots, changed color distr.plot.xy(Income.S, TotPurch, plot.type = "scatter", interval.x = TRUE, breaks.y = c(0,5,10,15,20,35), color = "orchid", data = MktDATA) # Arguments adj.breaks and use.scientific # Variable with very wide ranges LargeC<-MktDATA$AOV*5000000 LargeX<-MktDATA$Baseline*1000000 LargeY<-MktDATA$TotVal*1000000 # - Default: no scientific notation distr.plot.xy(LargeX, LargeY, plot.type = "scatter", var.c = LargeC, data = MktDATA) distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, data = MktDATA) # - Scientific notation for axes distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, use.scientific = TRUE, data = MktDATA) # - Scientific notation for intervals' endpoints distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, adj.breaks = FALSE, data = MktDATA) # - Scientific notation for intervals endpoints and axes distr.plot.xy(LargeX, LargeY, plot.type = "scatter", var.c = LargeC, fitline = TRUE, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA) distr.plot.xy(LargeX, LargeY, plot.type = "scatter", breaks.x = 10, var.c = LargeC, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA)
distr.summary.x()
computes summary statistics of a vector or a factor.
distr.summary.x( x, stats = c("summary"), by1, by2, breaks.by1, interval.by1 = FALSE, breaks.by2, interval.by2 = FALSE, adj.breaks = TRUE, digits = 2, f.digits = 4, force.digits = FALSE, use.scientific = FALSE, data, ... )
distr.summary.x( x, stats = c("summary"), by1, by2, breaks.by1, interval.by1 = FALSE, breaks.by2, interval.by2 = FALSE, adj.breaks = TRUE, digits = 2, f.digits = 4, force.digits = FALSE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the variable whose
distribution has to be summarized. |
stats |
A character vector specifying the summary statistics to compute (more summaries can be specified). Specific types of summaries can be requested with the following options:
It is also possible to request the following statistics:
|
by1 , by2
|
Unquoted strings identifying optional variables
(typically taking few values/levels) used to build conditional
summaries, that can be defined same way as |
breaks.by1 , breaks.by2
|
Allow classifying the variables |
interval.by1 , interval.by2
|
Logical values indicating
whether |
adj.breaks |
Logical value indicating whether the endpoints of
intervals of the numerical variables |
digits , f.digits
|
Integer values specifying the number of
decimals used to round respectively summary statistics
(default: |
force.digits |
Logical value indicating whether the
requested summaries should be forcedly rounded to the number of decimals
specified in |
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A list whose elements are tables
(converted to dataframes) with the requested summaries, possibly
conditioned to by1
and/or by2
. The values taken
by the conditioning variables are arranged in standard
order (logical, alphabetical or numerical order for vectors,
order of levels for factors, ordered intervals for classified
variables or for variables measured in classes).
Raffaella Piccarreta [email protected]
summaries.plot.x()
to graphically display
conditioned tendency summaries of a univariate distribution.
distr.table.x()
for tabulating a univariate
distribution.
distr.plot.x()
for plotting a univariate
distribution.
data(MktDATA, package = "UBStats") # Marginal summaries # - Numerical variable: Default summaries distr.summary.x(x = AOV, data = MktDATA) # - Numerical variable: More summaries distr.summary.x(x = AOV, stats = c("central","dispersion","fivenum"), data = MktDATA) distr.summary.x(x = AOV, stats = c("mode","mean","sd","cv","fivenum"), data = MktDATA) # - Character or factor (only proper statistics calculated) distr.summary.x(x = LikeMost, stats = c("mode","mean","sd","cv","fivenum"), data = MktDATA) distr.summary.x(x = Education, stats = c("mode","mean","sd","cv","fivenum"), data = MktDATA) # Measures conditioned to a single variable # - Numerical variable by a character vector distr.summary.x(x = TotVal, stats = c("p5","p10","p25","p50","p75","p90","p95"), by1 = Gender, digits = 1, data = MktDATA) # - Numerical variable by a numerical variable # classified into intervals distr.summary.x(x = TotVal, stats = c("central","dispersion"), by1 = AOV, breaks.by1 = 5, digits = 1, data = MktDATA) # - Numerical variable by a variable measured in classes distr.summary.x(x = TotVal, stats = c("central","dispersion"), by1 = Income.S, interval.by1 = TRUE, digits = 1, data = MktDATA) # Measures conditioned to two variables distr.summary.x(x = TotVal, stats = "fivenumbers", by1 = Gender, by2 = Kids, data = MktDATA) distr.summary.x(x = TotVal, stats = "fivenumbers", by1 = Income.S, by2 = Gender, interval.by1 = TRUE, data = MktDATA) distr.summary.x(x = TotVal, stats = "fivenumbers", by1 = Gender, by2 = AOV, breaks.by2 = 5, data = MktDATA) # Arguments adj.breaks and use.scientific # Variables with a very wide range LargeX<-MktDATA$TotVal*1000000 LargeBY<-MktDATA$AOV*5000000 # - Default: no scientific notation distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, data = MktDATA) # - Scientific notation for summaries distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, use.scientific = TRUE, data = MktDATA) # - Scientific notation for intervals endpoints distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, data = MktDATA) # - Scientific notation for intervals endpoints and summaries distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA) # Output the list with the requested summaries Out_TotVal<-distr.summary.x(x = TotVal, by1 = Income.S, by2 = Gender, interval.by1 = TRUE, stats = c("central","fivenum","dispersion"), data = MktDATA)
data(MktDATA, package = "UBStats") # Marginal summaries # - Numerical variable: Default summaries distr.summary.x(x = AOV, data = MktDATA) # - Numerical variable: More summaries distr.summary.x(x = AOV, stats = c("central","dispersion","fivenum"), data = MktDATA) distr.summary.x(x = AOV, stats = c("mode","mean","sd","cv","fivenum"), data = MktDATA) # - Character or factor (only proper statistics calculated) distr.summary.x(x = LikeMost, stats = c("mode","mean","sd","cv","fivenum"), data = MktDATA) distr.summary.x(x = Education, stats = c("mode","mean","sd","cv","fivenum"), data = MktDATA) # Measures conditioned to a single variable # - Numerical variable by a character vector distr.summary.x(x = TotVal, stats = c("p5","p10","p25","p50","p75","p90","p95"), by1 = Gender, digits = 1, data = MktDATA) # - Numerical variable by a numerical variable # classified into intervals distr.summary.x(x = TotVal, stats = c("central","dispersion"), by1 = AOV, breaks.by1 = 5, digits = 1, data = MktDATA) # - Numerical variable by a variable measured in classes distr.summary.x(x = TotVal, stats = c("central","dispersion"), by1 = Income.S, interval.by1 = TRUE, digits = 1, data = MktDATA) # Measures conditioned to two variables distr.summary.x(x = TotVal, stats = "fivenumbers", by1 = Gender, by2 = Kids, data = MktDATA) distr.summary.x(x = TotVal, stats = "fivenumbers", by1 = Income.S, by2 = Gender, interval.by1 = TRUE, data = MktDATA) distr.summary.x(x = TotVal, stats = "fivenumbers", by1 = Gender, by2 = AOV, breaks.by2 = 5, data = MktDATA) # Arguments adj.breaks and use.scientific # Variables with a very wide range LargeX<-MktDATA$TotVal*1000000 LargeBY<-MktDATA$AOV*5000000 # - Default: no scientific notation distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, data = MktDATA) # - Scientific notation for summaries distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, use.scientific = TRUE, data = MktDATA) # - Scientific notation for intervals endpoints distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, data = MktDATA) # - Scientific notation for intervals endpoints and summaries distr.summary.x(LargeX, by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA) # Output the list with the requested summaries Out_TotVal<-distr.summary.x(x = TotVal, by1 = Income.S, by2 = Gender, interval.by1 = TRUE, stats = c("central","fivenum","dispersion"), data = MktDATA)
distr.table.x()
computes the frequency table of a vector or a factor.
distr.table.x( x, freq = c("counts", "proportions"), total = TRUE, breaks, adj.breaks = TRUE, interval = FALSE, f.digits = 2, p.digits = 0, d.digits = 5, force.digits = FALSE, use.scientific = FALSE, data, ... )
distr.table.x( x, freq = c("counts", "proportions"), total = TRUE, breaks, adj.breaks = TRUE, interval = FALSE, f.digits = 2, p.digits = 0, d.digits = 5, force.digits = FALSE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the variable whose
distribution has to be analysed. |
freq |
A character vector specifying the set of frequencies to be
displayed (more options are allowed). Allowed options (possibly abbreviated)
are |
total |
Logical value indicating whether the sum of the requested
frequencies should be added to the table; default to |
breaks |
Allows to classify a numerical variable |
adj.breaks |
Logical value indicating whether the endpoints of
intervals of a numerical variable |
interval |
Logical value indicating whether |
f.digits , p.digits , d.digits
|
Integer values specifying the number of
decimals used to round respectively proportions (default: |
force.digits |
Logical value indicating whether frequencies and
densities should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables (typically densities) should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table (converted to dataframe) listing the values taken by the variable, arranged in standard order (logical, alphabetical or numerical order for vectors, order of levels for factors, ordered intervals for classified variables or for variables measured in classes), and the requested set of frequencies.
Raffaella Piccarreta [email protected]
distr.plot.x()
for plotting a univariate
distribution.
distr.table.xy()
for tabulating a bivariate
distribution.
distr.plot.xy()
for plotting a bivariate
distribution.
data(MktDATA, package = "UBStats") # Character vectors, factors, and discrete numeric vectors distr.table.x(Education, data = MktDATA) distr.table.x(Children, freq = c("count","prop","cum"), data = MktDATA) # Numerical variable classified into intervals # - Classes of equal width distr.table.x(AOV, breaks = 6, freq = c("Count","Prop","Perc","Cum"), p.digits = 2, data = MktDATA) # - Classes with specified endpoints distr.table.x(AOV, breaks = c(0,20,30,50,100,180), freq = c("Count","Perc","Cum","Densities"), p.digits = 2, data = MktDATA) # Numerical variable measured in classes # - Variable measured in classes distr.table.x(Income, freq = c("count","prop","cum","dens"), interval = TRUE, data = MktDATA) # - An example of non-consistent intervals. # Densities are not calculated x.inconsistent <- c(rep("0;10",30),rep("10;20",25),rep("25;8",25), rep("15;31",15),rep("20;45",16),rep("30;40",18)) distr.table.x(x.inconsistent, freq = c("count","prop","cum","dens"), interval = TRUE) # Arguments adj.breaks, use.scientific, and force.digits # A variable with a very wide range (very small densities) LargeX <- MktDATA$AOV*5000000 # - Default: manages possible excess of rounding distr.table.x(LargeX, breaks = 5, freq = c("count","percent","densities")) # - Forcing digits to the default values distr.table.x(LargeX, breaks = 5, freq=c("count","percent","dens"), force.digits = TRUE) # - Scientific notation for frequencies/densities distr.table.x(LargeX, breaks = 5, freq = c("count","percent","dens"), use.scientific = TRUE) # - Scientific notation both for intervals’ endpoints # and for frequencies/densities distr.table.x(LargeX, breaks = 5, adj.breaks = FALSE, freq = c("count","percent","dens"), use.scientific = TRUE) # Output a dataframe with the table table.AOV<-distr.table.x(AOV, breaks = c(0,20,30,50,100,180), freq = c("Count","Perc","Cum","Dens"), data = MktDATA)
data(MktDATA, package = "UBStats") # Character vectors, factors, and discrete numeric vectors distr.table.x(Education, data = MktDATA) distr.table.x(Children, freq = c("count","prop","cum"), data = MktDATA) # Numerical variable classified into intervals # - Classes of equal width distr.table.x(AOV, breaks = 6, freq = c("Count","Prop","Perc","Cum"), p.digits = 2, data = MktDATA) # - Classes with specified endpoints distr.table.x(AOV, breaks = c(0,20,30,50,100,180), freq = c("Count","Perc","Cum","Densities"), p.digits = 2, data = MktDATA) # Numerical variable measured in classes # - Variable measured in classes distr.table.x(Income, freq = c("count","prop","cum","dens"), interval = TRUE, data = MktDATA) # - An example of non-consistent intervals. # Densities are not calculated x.inconsistent <- c(rep("0;10",30),rep("10;20",25),rep("25;8",25), rep("15;31",15),rep("20;45",16),rep("30;40",18)) distr.table.x(x.inconsistent, freq = c("count","prop","cum","dens"), interval = TRUE) # Arguments adj.breaks, use.scientific, and force.digits # A variable with a very wide range (very small densities) LargeX <- MktDATA$AOV*5000000 # - Default: manages possible excess of rounding distr.table.x(LargeX, breaks = 5, freq = c("count","percent","densities")) # - Forcing digits to the default values distr.table.x(LargeX, breaks = 5, freq=c("count","percent","dens"), force.digits = TRUE) # - Scientific notation for frequencies/densities distr.table.x(LargeX, breaks = 5, freq = c("count","percent","dens"), use.scientific = TRUE) # - Scientific notation both for intervals’ endpoints # and for frequencies/densities distr.table.x(LargeX, breaks = 5, adj.breaks = FALSE, freq = c("count","percent","dens"), use.scientific = TRUE) # Output a dataframe with the table table.AOV<-distr.table.x(AOV, breaks = c(0,20,30,50,100,180), freq = c("Count","Perc","Cum","Dens"), data = MktDATA)
distr.table.xy()
displays tables of joint or conditional
distributions.
distr.table.xy( x, y, freq = "counts", freq.type = "joint", total = TRUE, breaks.x, breaks.y, adj.breaks = TRUE, interval.x = FALSE, interval.y = FALSE, f.digits = 2, p.digits = 0, force.digits = FALSE, data, ... )
distr.table.xy( x, y, freq = "counts", freq.type = "joint", total = TRUE, breaks.x, breaks.y, adj.breaks = TRUE, interval.x = FALSE, interval.y = FALSE, f.digits = 2, p.digits = 0, force.digits = FALSE, data, ... )
x , y
|
Unquoted strings identifying the variables whose joint
distribution has to be analysed. |
freq |
A character vector specifying the set of frequencies
to be displayed (more options are allowed). Allowed options
(possibly abbreviated) are |
freq.type |
A character vector specifying the types of
frequencies to be displayed (more types are allowed).
Allowed options are |
total |
Logical value indicating whether the sum of the requested
frequencies should be added to the table; default to |
breaks.x , breaks.y
|
Allow to classify the variables |
adj.breaks |
Logical value indicating whether the endpoints of
intervals of a numerical variable ( |
interval.x , interval.y
|
Logical values indicating whether
|
f.digits , p.digits
|
Integer values specifying the number of
decimals used to round respectively proportions
(default: |
force.digits |
Logical value indicating whether proportions and
percentages should be forcedly rounded to the number of decimals specified in
|
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A list whose elements are the requested tables (converted to dataframes) listing the values taken by the two variables arranged in standard order (logical, alphabetical or numerical order for vectors, order of levels for factors, ordered intervals for classified variables or for variables measured in classes) and the specified joint or conditional types of frequencies.
Raffaella Piccarreta [email protected]
distr.plot.xy()
for plotting a bivariate
distribution.
distr.table.x()
for tabulating a univariate
distribution.
distr.plot.x()
for plotting a univariate
distribution.
data(MktDATA, package = "UBStats") # Character vectors, factors, and discrete numeric vectors # - Default: joint counts distr.table.xy(LikeMost, Children, data = MktDATA) # - Joint and conditional distribution of x|y # counts and proportions, no totals distr.table.xy(LikeMost, Education, freq = c("counts","Prop"), freq.type = c("joint","x|y"), total = FALSE, data = MktDATA) # - Joint and conditional row and column distributions (%) distr.table.xy(CustClass, Children, freq = "Percentages", freq.type = c("joint","row","column"), data = MktDATA) # Numerical variables classified or measured in classes # - A numerical variable classified into intervals # and a factor distr.table.xy(CustClass, TotPurch, breaks.y = c(0,5,10,15,20,35), freq = c("Counts","Prop"), freq.type = "y|x", data = MktDATA) # - Two numerical variables, one measured in classes # and the other classified into intervals distr.table.xy(Income.S, TotPurch, interval.x = TRUE, breaks.y = c(0,5,10,15,20,35), freq = c("Counts","Prop"), freq.type = c("row","col"), data = MktDATA) # Argument force.digits # - Default: manages possible excess of rounding distr.table.xy(CustClass, Children, freq = "Percentages", freq.type = c("x|y"),data = MktDATA) # - Force to the required rounding distr.table.xy(CustClass, Children, freq = "Percentages", freq.type = c("x|y"), force.digits = TRUE, data = MktDATA) # Output the list with the requested tables tables.xy<-distr.table.xy(Income.S, TotPurch, interval.x = TRUE, breaks.y = c(0,5,10,15,20,35), freq = c("Counts","Prop"), freq.type = c("joint","row","col"), data = MktDATA)
data(MktDATA, package = "UBStats") # Character vectors, factors, and discrete numeric vectors # - Default: joint counts distr.table.xy(LikeMost, Children, data = MktDATA) # - Joint and conditional distribution of x|y # counts and proportions, no totals distr.table.xy(LikeMost, Education, freq = c("counts","Prop"), freq.type = c("joint","x|y"), total = FALSE, data = MktDATA) # - Joint and conditional row and column distributions (%) distr.table.xy(CustClass, Children, freq = "Percentages", freq.type = c("joint","row","column"), data = MktDATA) # Numerical variables classified or measured in classes # - A numerical variable classified into intervals # and a factor distr.table.xy(CustClass, TotPurch, breaks.y = c(0,5,10,15,20,35), freq = c("Counts","Prop"), freq.type = "y|x", data = MktDATA) # - Two numerical variables, one measured in classes # and the other classified into intervals distr.table.xy(Income.S, TotPurch, interval.x = TRUE, breaks.y = c(0,5,10,15,20,35), freq = c("Counts","Prop"), freq.type = c("row","col"), data = MktDATA) # Argument force.digits # - Default: manages possible excess of rounding distr.table.xy(CustClass, Children, freq = "Percentages", freq.type = c("x|y"),data = MktDATA) # - Force to the required rounding distr.table.xy(CustClass, Children, freq = "Percentages", freq.type = c("x|y"), force.digits = TRUE, data = MktDATA) # Output the list with the requested tables tables.xy<-distr.table.xy(Income.S, TotPurch, interval.x = TRUE, breaks.y = c(0,5,10,15,20,35), freq = c("Counts","Prop"), freq.type = c("joint","row","col"), data = MktDATA)
LM.output()
Provides fitted values, residuals and other basic
quantities used to check the quality of regression fits.
LM.output(object, data)
LM.output(object, data)
object |
An object returned by function lm. |
data |
An optional data frame containing the data frame possibly specified in the call of function lm. |
A dataframe containing the variables in the model
and the model's fitted values, residuals
and influence statistics, merged with the dataframe
specified in the call of function lm, or with the
dataframe possibly specified in data
(if it is consistent with the model's output)
Raffaella Piccarreta [email protected]
data(MktDATA, package = "UBStats") # Model and output based on a given dataframe mod1 <- lm(TotVal ~ Baseline + Kids + Age, data = MktDATA) # Equivalent calls (since data is specified in lm() mod1_out <- LM.output(mod1, data = MktDATA) dim(mod1_out) mod1_out <- LM.output(mod1) dim(mod1_out) # same as above # Model based on a dataframe's columns mod2 <- lm(MktDATA$TotVal ~ MktDATA$Baseline + MktDATA$Kids + MktDATA$Age) mod2_out <- LM.output(mod2) # note: colnames in mod2_out colnames(mod2_out) # note that the dataframe in 'data' is not considered # as compatible, because the names of columns differ mod2_out <- LM.output(mod2, data = MktDATA)
data(MktDATA, package = "UBStats") # Model and output based on a given dataframe mod1 <- lm(TotVal ~ Baseline + Kids + Age, data = MktDATA) # Equivalent calls (since data is specified in lm() mod1_out <- LM.output(mod1, data = MktDATA) dim(mod1_out) mod1_out <- LM.output(mod1) dim(mod1_out) # same as above # Model based on a dataframe's columns mod2 <- lm(MktDATA$TotVal ~ MktDATA$Baseline + MktDATA$Kids + MktDATA$Age) mod2_out <- LM.output(mod2) # note: colnames in mod2_out colnames(mod2_out) # note that the dataframe in 'data' is not considered # as compatible, because the names of columns differ mod2_out <- LM.output(mod2, data = MktDATA)
This dataset is a modification of the original MktDATA.Orig
dataset and it is provided for user convenience.
data(MktDATA)
data(MktDATA)
A data frame with 2224 observations and 26 variables.
This dataset contains the variables from a survey on a set of customers of a company operating in the retail food sector. The company sells products from 3 major categories (referred to as A, B, C) The customers can order and acquire products in the company physical stores, or through the company's website (in this case, they can order on the website and pick up the order in one store). Information is collected on customers' activity in the last two years (observation period), as well as some information retrieved through questionnaires or fidelity cards. During such period different marketing strategies were adopted to improve customers' fidelization, and 5 marketing campaigns were launched; a last campaign was launched at the end of the observation period.
data(MktDATA.Orig)
data(MktDATA.Orig)
A data frame with 2224 observations and the following 19 variables (levels of the variables listed in alphabetical order):
CustId
(num
): customer's identification label
Gender
(chr
): customer's gender (F
, M
)
Age
(num
): customer's age (in years)
Education
(chr
): customer's level of education(College
, Graduate
,
HighSchool
, Post-Grad
)
Marital_Status
(chr
): customer's marital status
(Divorced
, Married
, Single
, Together
, Widow
)
Children
(num
): number of children in the household
Kids
(num
): number of kids aged less than 12 in the
household
Income
(chr
): customer's income (measured in classes)
Baseline
(num
): index (from 0 to 1) assigned by the marketing dept
indicating how promising the customer was judged at the beginning of
the observation period
LikeMost
(chr
): Most frequently bought category in the last two
years (P.A
, P.B
, P.C
)
TotVal
(num
): amount spent in the last 2 years
NPickUp_Purch
(num
): number of purchases made through company's
website and picked up in physical store
NWeb_Purch
(num
): number of purchases made through company's website
and delivered at home
NStore_Purch
(num
): number of purchases made in a physical store
NDeals
(num
): number of products purchases with discount
CustClass
(chr
): customer's classification (assigned by the marketing
dept) based on past profitability (Bronze
, Gold
, Platinum
,
Silver
)
PastCampaigns
(num
): number of offers accepted by the customer in the
last 2 years' marketing campaigns
LastCampaign
(num
): binary variable (0/1) indicating whether (1) or
not (0) the customer accepted the offer in the campaign launched at
the end of the observation period
WouldSuggest
(chr
): variable signalling whether (Yes
) or not
(No
) the customer declared they would suggest the company's products
to friends and family
The data set has been adapted from https://www.kaggle.com/code/dmitryuarov/customers-clustering-eda.
summaries.plot.x()
plots location statistics for a
numeric vector conditioned to the levels of one or more variables.
summaries.plot.x( x, stats = "mean", plot.type = "bars", conf.level = 0.95, by1, by2, breaks.by1, interval.by1 = FALSE, breaks.by2, interval.by2 = FALSE, adj.breaks = TRUE, bw = FALSE, color = NULL, legend = TRUE, use.scientific = FALSE, data, ... )
summaries.plot.x( x, stats = "mean", plot.type = "bars", conf.level = 0.95, by1, by2, breaks.by1, interval.by1 = FALSE, breaks.by2, interval.by2 = FALSE, adj.breaks = TRUE, bw = FALSE, color = NULL, legend = TRUE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying a numerical variable whose
tendency measures have to be graphically displayed.
|
stats |
A single character specifying the conditioned
tendency measure/s to
display in the plot. The available options are |
plot.type |
A single character specifying the type of plot
used to compare the requested measures conditioned to the levels
of one variable,
|
conf.level |
A number between 0 and 1 indicating the
confidence level of the intervals for the conditional means
when |
by1 , by2
|
Unquoted strings identifying variables
(typically taking few values/levels) used to build conditional
summaries, that can be defined same way as |
breaks.by1 , breaks.by2
|
Allow classifying the variables
|
interval.by1 , interval.by2
|
Logical values indicating
whether |
adj.breaks |
Logical value indicating whether the endpoints of
intervals of the numerical variables |
bw |
Logical value indicating whether plots should be colored
in scale of greys ( |
color |
Optional string vector to specify colors
to use in the plot rather than a standard palette
( |
legend |
Logical value indicating whether a legend should be displayed
in the plot ( |
use.scientific |
Logical value indicating whether numbers on
axes should be displayed using scientific notation
( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table (converted to dataframe) reporting the requested statistics conditioned to the levels of the specified layers.
Raffaella Piccarreta [email protected]
distr.summary.x()
for tabulating summary
measures of a univariate distribution.
distr.plot.x()
for plotting a univariate
distribution.
distr.table.x()
for tabulating a univariate
distribution.
data(MktDATA, package = "UBStats") # Means (and their CI) or medians by a single variable # - Barplot of means (default) by a character summaries.plot.x(x = TotVal, stats = "mean", by1 = Gender, data = MktDATA) # - Barplot of medians by a numerical variable # classified into intervals: user-defined color summaries.plot.x(x = TotVal, stats = "median", by1 = AOV, breaks.by1 = 5, color = "purple", data = MktDATA) # - Lineplot of means and their CI by a variable # measured in classes summaries.plot.x(x = TotVal, stats = "ci.mean", plot.type = "lines", by1 = Income.S, interval.by1 = TRUE, data = MktDATA) # - Barplot of means and their CI by a # numerical variable; change the confidence level summaries.plot.x(x = TotVal, stats = "ci.mean", conf.level = 0.90, plot.type = "bars", by1 = NWeb_Purch, data = MktDATA) # - Note: no plot built for a variable with # too many levels (>20) # summaries.plot.x(x = TotVal, # stats = "ci.mean", plot.type = "lines", # by1 = AOV, data = MktDATA) # Quantiles by a single variable # - Only lines plots allowed for quantiles summaries.plot.x(x = Baseline, stats = "deciles", plot.type = "lines", by1 = NDeals, data = MktDATA) summaries.plot.x(x = Baseline, stats = "quartiles", plot.type = "lines", by1 = Marital_Status, data = MktDATA) # Means and medians by two variables # - Default: only lines allowed summaries.plot.x(x = TotVal, stats = "mean", by1 = Education, by2 = Kids, data = MktDATA) summaries.plot.x(x = TotVal, stats = "median", by1 = Income.S, by2 = Gender, interval.by1 = TRUE, data = MktDATA) summaries.plot.x(x = Baseline, stats = "mean", by1 = CustClass, by2 = AOV, breaks.by2 = 5, data = MktDATA) # - "ci.mean" not allowed with two layers CustClass_Kids<-paste0(MktDATA$CustClass,"-",MktDATA$Kids) summaries.plot.x(x = Baseline, stats = "ci.mean", conf.level = 0.99, by1 = CustClass_Kids, color = "gold", data = MktDATA) # Arguments adj.breaks and use.scientific # Variables with a very wide range LargeX<-MktDATA$TotVal*1000000 LargeBY<-MktDATA$AOV*5000000 # - Default: no scientific notation summaries.plot.x(LargeX, plot.type = "bars", by1=LargeBY, breaks.by1 = 5, data = MktDATA) # - Scientific notation for summaries (axes) summaries.plot.x(LargeX, plot.type = "lines", by1=LargeBY, breaks.by1 = 5, use.scientific = TRUE, data = MktDATA) # - Scientific notation for intervals endpoints summaries.plot.x(LargeX, stats = "ci.mean", plot.type = "lines", by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, data = MktDATA) # - Scientific notation for intervals endpoints and summaries summaries.plot.x(LargeX, stats = "quartiles", plot.type = "lines", by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA) # Output the table with the requested summaries Out_TotVal<-summaries.plot.x(x = TotVal, stats = "ci.mean", by1 = Education, data = MktDATA)
data(MktDATA, package = "UBStats") # Means (and their CI) or medians by a single variable # - Barplot of means (default) by a character summaries.plot.x(x = TotVal, stats = "mean", by1 = Gender, data = MktDATA) # - Barplot of medians by a numerical variable # classified into intervals: user-defined color summaries.plot.x(x = TotVal, stats = "median", by1 = AOV, breaks.by1 = 5, color = "purple", data = MktDATA) # - Lineplot of means and their CI by a variable # measured in classes summaries.plot.x(x = TotVal, stats = "ci.mean", plot.type = "lines", by1 = Income.S, interval.by1 = TRUE, data = MktDATA) # - Barplot of means and their CI by a # numerical variable; change the confidence level summaries.plot.x(x = TotVal, stats = "ci.mean", conf.level = 0.90, plot.type = "bars", by1 = NWeb_Purch, data = MktDATA) # - Note: no plot built for a variable with # too many levels (>20) # summaries.plot.x(x = TotVal, # stats = "ci.mean", plot.type = "lines", # by1 = AOV, data = MktDATA) # Quantiles by a single variable # - Only lines plots allowed for quantiles summaries.plot.x(x = Baseline, stats = "deciles", plot.type = "lines", by1 = NDeals, data = MktDATA) summaries.plot.x(x = Baseline, stats = "quartiles", plot.type = "lines", by1 = Marital_Status, data = MktDATA) # Means and medians by two variables # - Default: only lines allowed summaries.plot.x(x = TotVal, stats = "mean", by1 = Education, by2 = Kids, data = MktDATA) summaries.plot.x(x = TotVal, stats = "median", by1 = Income.S, by2 = Gender, interval.by1 = TRUE, data = MktDATA) summaries.plot.x(x = Baseline, stats = "mean", by1 = CustClass, by2 = AOV, breaks.by2 = 5, data = MktDATA) # - "ci.mean" not allowed with two layers CustClass_Kids<-paste0(MktDATA$CustClass,"-",MktDATA$Kids) summaries.plot.x(x = Baseline, stats = "ci.mean", conf.level = 0.99, by1 = CustClass_Kids, color = "gold", data = MktDATA) # Arguments adj.breaks and use.scientific # Variables with a very wide range LargeX<-MktDATA$TotVal*1000000 LargeBY<-MktDATA$AOV*5000000 # - Default: no scientific notation summaries.plot.x(LargeX, plot.type = "bars", by1=LargeBY, breaks.by1 = 5, data = MktDATA) # - Scientific notation for summaries (axes) summaries.plot.x(LargeX, plot.type = "lines", by1=LargeBY, breaks.by1 = 5, use.scientific = TRUE, data = MktDATA) # - Scientific notation for intervals endpoints summaries.plot.x(LargeX, stats = "ci.mean", plot.type = "lines", by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, data = MktDATA) # - Scientific notation for intervals endpoints and summaries summaries.plot.x(LargeX, stats = "quartiles", plot.type = "lines", by1=LargeBY, breaks.by1 = 5, adj.breaks = FALSE, use.scientific = TRUE, data = MktDATA) # Output the table with the requested summaries Out_TotVal<-summaries.plot.x(x = TotVal, stats = "ci.mean", by1 = Education, data = MktDATA)
TEST.diffmean()
tests hypotheses on the difference between the
means of two independent or paired populations.
TEST.diffmean( x, y, type = "independent", mdiff0 = 0, alternative = "two.sided", sigma.x = NULL, sigma.y = NULL, by, sigma.by = NULL, sigma.d = NULL, var.test = FALSE, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
TEST.diffmean( x, y, type = "independent", mdiff0 = 0, alternative = "two.sided", sigma.x = NULL, sigma.y = NULL, by, sigma.by = NULL, sigma.d = NULL, var.test = FALSE, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x , y
|
Unquoted strings identifying the numeric
variables with the same length whose means have to be compared. |
type |
A length-one character vector specifying the type of samples.
Allowed values are |
mdiff0 |
Numeric value that specifies the null hypothesis to test for (default is 0). |
alternative |
A length-one character vector specifying the direction
of the alternative hypothesis. Allowed values are |
sigma.x , sigma.y
|
Optional numeric values specifying
the possibly known populations' standard deviations
(when |
by |
Optional unquoted string, available only when
|
sigma.by |
Optional numeric value specifying the possibly known
standard deviations for the two independent samples identified via
|
sigma.d |
Optional numeric value specifying the possibly known standard deviation of the difference when samples are paired. |
var.test |
Logical value indicating whether to run a test on the equality of variance for two (independent) samples or not (default). |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the results of the test on the difference between the populations' means. For independent samples in the case of unknown variances the test is run both under the assumption that the variances are equal and under the assumption that they differ, using percentiles from both the normal and the Student's t distribution.
Raffaella Piccarreta [email protected]
CI.diffmean()
to build confidence intervals for
the difference between two populations' means.
data(MktDATA, package = "UBStats") # Independent samples (default type), UNKNOWN variances # Bilateral test on difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 0) # - Using x,by: groups identified by ordered levels of by TEST.diffmean(x = AOV, by = Gender, mdiff0 = 0, data = MktDATA) # Since order is F, M, hypothesis are on mean(F) - mean(M) # To test hypotheses on mean(M) - mean(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffmean(x = AOV, by = Gender.R , mdiff0 = 0, data = MktDATA) # - Testing also hypotheses on equality of unknown variances TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 0, var.test = TRUE) # - Output results: test on differences out.test_diffM<-TEST.diffmean(x = AOV_M, y = AOV_F) # - Output results: list with both test on means and variances out.test_diffM.V<-TEST.diffmean(x = AOV_M, y = AOV_F, var.test = TRUE) # Independent samples (default type), KNOWN variances # Test hypotheses on the difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 10, alternative = "greater", sigma.x = 10, sigma.y = 20) # - Using x,by: groups identified by ordered levels of by # Adjust considering the ordering of levels TEST.diffmean(x = AOV, by = Gender, mdiff0 = -10, alternative = "less", sigma.by = c("M" = 10, "F"=20), data = MktDATA) # To change the sign, order levels as desired Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffmean(x = AOV, by = Gender.R, mdiff0 = 10, alternative = "greater", sigma.by = c("M" = 10, "F"=20), data = MktDATA) # - Output results out.test_diffM<-TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 10, alternative = "greater", sigma.x = 10, sigma.y = 20) # Paired samples: UNKNOWN variances # - Default settings TEST.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", mdiff0 = 1.5, alternative = "greater", data=MktDATA) # Paired: KNOWN variances TEST.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", mdiff0 = 1.5, alternative = "greater", sigma.d = 2, data = MktDATA) # - Output results out.test_diffM<-TEST.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", mdiff0 = 1.5, alternative = "greater", sigma.d = 2, data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/50000 SmallX_M <- SmallX[MktDATA$Gender == "M"] SmallX_F <- SmallX[MktDATA$Gender == "F"] # - Default output TEST.diffmean(x = SmallX_M, y = SmallX_F) # - Request to use the exact number of digits (default, 2) TEST.diffmean(x = SmallX_M, y = SmallX_F, force.digits = TRUE) # - Request to allow scientific notation TEST.diffmean(x = SmallX_M, y = SmallX_F, use.scientific = TRUE)
data(MktDATA, package = "UBStats") # Independent samples (default type), UNKNOWN variances # Bilateral test on difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 0) # - Using x,by: groups identified by ordered levels of by TEST.diffmean(x = AOV, by = Gender, mdiff0 = 0, data = MktDATA) # Since order is F, M, hypothesis are on mean(F) - mean(M) # To test hypotheses on mean(M) - mean(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffmean(x = AOV, by = Gender.R , mdiff0 = 0, data = MktDATA) # - Testing also hypotheses on equality of unknown variances TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 0, var.test = TRUE) # - Output results: test on differences out.test_diffM<-TEST.diffmean(x = AOV_M, y = AOV_F) # - Output results: list with both test on means and variances out.test_diffM.V<-TEST.diffmean(x = AOV_M, y = AOV_F, var.test = TRUE) # Independent samples (default type), KNOWN variances # Test hypotheses on the difference between means of males and females # - Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 10, alternative = "greater", sigma.x = 10, sigma.y = 20) # - Using x,by: groups identified by ordered levels of by # Adjust considering the ordering of levels TEST.diffmean(x = AOV, by = Gender, mdiff0 = -10, alternative = "less", sigma.by = c("M" = 10, "F"=20), data = MktDATA) # To change the sign, order levels as desired Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffmean(x = AOV, by = Gender.R, mdiff0 = 10, alternative = "greater", sigma.by = c("M" = 10, "F"=20), data = MktDATA) # - Output results out.test_diffM<-TEST.diffmean(x = AOV_M, y = AOV_F, mdiff0 = 10, alternative = "greater", sigma.x = 10, sigma.y = 20) # Paired samples: UNKNOWN variances # - Default settings TEST.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", mdiff0 = 1.5, alternative = "greater", data=MktDATA) # Paired: KNOWN variances TEST.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", mdiff0 = 1.5, alternative = "greater", sigma.d = 2, data = MktDATA) # - Output results out.test_diffM<-TEST.diffmean(x = NStore_Purch, y = NWeb_Purch, type = "paired", mdiff0 = 1.5, alternative = "greater", sigma.d = 2, data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/50000 SmallX_M <- SmallX[MktDATA$Gender == "M"] SmallX_F <- SmallX[MktDATA$Gender == "F"] # - Default output TEST.diffmean(x = SmallX_M, y = SmallX_F) # - Request to use the exact number of digits (default, 2) TEST.diffmean(x = SmallX_M, y = SmallX_F, force.digits = TRUE) # - Request to allow scientific notation TEST.diffmean(x = SmallX_M, y = SmallX_F, use.scientific = TRUE)
TEST.diffprop()
tests hypotheses on the difference between the
proportion of successes in two independent populations.
TEST.diffprop( x, y, success.x = NULL, success.y = NULL, pdiff0 = 0, alternative = "two.sided", by, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
TEST.diffprop( x, y, success.x = NULL, success.y = NULL, pdiff0 = 0, alternative = "two.sided", by, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x , y
|
Unquoted strings identifying the variables of
interest. |
success.x , success.y
|
If |
pdiff0 |
Numeric value that specifies the null hypothesis to test for (default is 0). |
alternative |
A length-one character vector specifying the direction
of the alternative hypothesis. Allowed values are |
by |
Optional unquoted string identifying a variable
(of any type), defined same way as |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the results of the test on the difference between the proportions of successes in two independent populations.
Raffaella Piccarreta [email protected]
CI.diffprop()
to build confidence intervals for
the difference between two populations' proportions of successes.
data(MktDATA, package = "UBStats") # Proportions of success defined on non-binary and # non-logical vectors; 'success' coded same way # for both vectors # - Using x,y: build vectors with data on the two groups WouldSuggest_F <- MktDATA$WouldSuggest[MktDATA$Gender == "F"] WouldSuggest_M <- MktDATA$WouldSuggest[MktDATA$Gender == "M"] TEST.diffprop(x = WouldSuggest_M, y = WouldSuggest_F, success.x = "Yes", pdiff0 = 0.1, alternative = "less") PastCampaigns_F<-MktDATA$PastCampaigns[MktDATA$Gender=="F"] PastCampaigns_M<-MktDATA$PastCampaigns[MktDATA$Gender=="M"] TEST.diffprop(x = PastCampaigns_M, y = PastCampaigns_F, success.x = 0, pdiff0 = 0.2) # - Using x,by: groups identified by ordered levels of by TEST.diffprop(x = PastCampaigns, by = Gender, success.x=0, pdiff0 = 0.2, data = MktDATA) # Since order is F, M, test is on prop(F) - prop(M) # To get the interval for prop(M) - prop(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffprop(x = PastCampaigns, by = Gender.R, success.x=0, pdiff0 = 0.2, data = MktDATA) # Proportions of success defined based on # binary or logical vectors; 'success' # coded same way for both vectors # - Binary variable (success=1): based on x,y LastCampaign_F<-MktDATA$LastCampaign[MktDATA$Gender=="F"] LastCampaign_M<-MktDATA$LastCampaign[MktDATA$Gender=="M"] TEST.diffprop(x = LastCampaign_M, y = LastCampaign_F) # - Binary variable (success=1): based on x,y # see above for recoding of levels of Gender TEST.diffprop(x = LastCampaign, by = Gender, data = MktDATA) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffprop(x = LastCampaign, by = Gender.R, data = MktDATA) # - Logical variable (success=TRUE): based on x,y Deals_w_child <- MktDATA$Deals.ge50[MktDATA$Children>0] Deals_no_child <- MktDATA$Deals.ge50[MktDATA$Children==0] TEST.diffprop(x = Deals_w_child, y = Deals_no_child, pdiff0 = 0.2, alternative = "less",) # Proportions defined on # non-binary and non-logical vectors, with 'success' # coded differently (only specification x,y is reasonable here) WouldSuggest_Other<-c(rep("OK",310),rep("KO",650-310)) TEST.diffprop(x = WouldSuggest, y = WouldSuggest_Other, success.x = "Yes", success.y = "OK", pdiff0 = 0.1, alternative = "greater", data = MktDATA) # Proportions based on combined conditions # - Build logical vector/s indicating whether a condition # is satisfied IsTop<-MktDATA$AOV>80 IsTop_OK<-IsTop[MktDATA$WouldSuggest == "Yes"] IsTop_KO<-IsTop[MktDATA$WouldSuggest == "No"] TEST.diffprop(x = IsTop_OK, y = IsTop_KO, pdiff0 = 0.05, alternative = "greater") Deals<-MktDATA$NDeals>=5 Deals_Married <- Deals[MktDATA$Marital_Status=="Married" & MktDATA$Children==0] Deals_Single <- Deals[MktDATA$Marital_Status=="Single"] TEST.diffprop(x = Deals_Married, y = Deals_Single, alternative = "less") # Output results out.test_diffP<-TEST.diffprop(x = Deals_Married, y = Deals_Single, alternative = "less") # Arguments force.digits and use.scientific # An input variable taking very low values HighAOV <- MktDATA$AOV>150 # - Default: manages possible excess of rounding TEST.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"]) # - Force to the exact number of digits (default, 2) TEST.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], force.digits = TRUE) # - Allow scientific notation TEST.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], use.scientific = TRUE)
data(MktDATA, package = "UBStats") # Proportions of success defined on non-binary and # non-logical vectors; 'success' coded same way # for both vectors # - Using x,y: build vectors with data on the two groups WouldSuggest_F <- MktDATA$WouldSuggest[MktDATA$Gender == "F"] WouldSuggest_M <- MktDATA$WouldSuggest[MktDATA$Gender == "M"] TEST.diffprop(x = WouldSuggest_M, y = WouldSuggest_F, success.x = "Yes", pdiff0 = 0.1, alternative = "less") PastCampaigns_F<-MktDATA$PastCampaigns[MktDATA$Gender=="F"] PastCampaigns_M<-MktDATA$PastCampaigns[MktDATA$Gender=="M"] TEST.diffprop(x = PastCampaigns_M, y = PastCampaigns_F, success.x = 0, pdiff0 = 0.2) # - Using x,by: groups identified by ordered levels of by TEST.diffprop(x = PastCampaigns, by = Gender, success.x=0, pdiff0 = 0.2, data = MktDATA) # Since order is F, M, test is on prop(F) - prop(M) # To get the interval for prop(M) - prop(F) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffprop(x = PastCampaigns, by = Gender.R, success.x=0, pdiff0 = 0.2, data = MktDATA) # Proportions of success defined based on # binary or logical vectors; 'success' # coded same way for both vectors # - Binary variable (success=1): based on x,y LastCampaign_F<-MktDATA$LastCampaign[MktDATA$Gender=="F"] LastCampaign_M<-MktDATA$LastCampaign[MktDATA$Gender=="M"] TEST.diffprop(x = LastCampaign_M, y = LastCampaign_F) # - Binary variable (success=1): based on x,y # see above for recoding of levels of Gender TEST.diffprop(x = LastCampaign, by = Gender, data = MktDATA) Gender.R <- factor(MktDATA$Gender, levels = c("M", "F")) TEST.diffprop(x = LastCampaign, by = Gender.R, data = MktDATA) # - Logical variable (success=TRUE): based on x,y Deals_w_child <- MktDATA$Deals.ge50[MktDATA$Children>0] Deals_no_child <- MktDATA$Deals.ge50[MktDATA$Children==0] TEST.diffprop(x = Deals_w_child, y = Deals_no_child, pdiff0 = 0.2, alternative = "less",) # Proportions defined on # non-binary and non-logical vectors, with 'success' # coded differently (only specification x,y is reasonable here) WouldSuggest_Other<-c(rep("OK",310),rep("KO",650-310)) TEST.diffprop(x = WouldSuggest, y = WouldSuggest_Other, success.x = "Yes", success.y = "OK", pdiff0 = 0.1, alternative = "greater", data = MktDATA) # Proportions based on combined conditions # - Build logical vector/s indicating whether a condition # is satisfied IsTop<-MktDATA$AOV>80 IsTop_OK<-IsTop[MktDATA$WouldSuggest == "Yes"] IsTop_KO<-IsTop[MktDATA$WouldSuggest == "No"] TEST.diffprop(x = IsTop_OK, y = IsTop_KO, pdiff0 = 0.05, alternative = "greater") Deals<-MktDATA$NDeals>=5 Deals_Married <- Deals[MktDATA$Marital_Status=="Married" & MktDATA$Children==0] Deals_Single <- Deals[MktDATA$Marital_Status=="Single"] TEST.diffprop(x = Deals_Married, y = Deals_Single, alternative = "less") # Output results out.test_diffP<-TEST.diffprop(x = Deals_Married, y = Deals_Single, alternative = "less") # Arguments force.digits and use.scientific # An input variable taking very low values HighAOV <- MktDATA$AOV>150 # - Default: manages possible excess of rounding TEST.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"]) # - Force to the exact number of digits (default, 2) TEST.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], force.digits = TRUE) # - Allow scientific notation TEST.diffprop(x = HighAOV[MktDATA$Gender=="M"], y = HighAOV[MktDATA$Gender=="F"], use.scientific = TRUE)
TEST.diffvar()
tests the hypothesis of equality between the
variances of two independent populations.
TEST.diffvar( x, y, by, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
TEST.diffvar( x, y, by, digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x , y
|
Unquoted strings identifying the numeric
variables with the same length whose variances have to be compared.
|
by |
Optional unquoted string identifying a variable
(of any type), defined same way as |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the results of the test on the difference between the variances of two independent populations.
Raffaella Piccarreta [email protected]
CI.diffmean()
to build confidence intervals for
the difference between two populations' means.
TEST.diffmean()
to test hypotheses on the difference
between two populations' means.
data(MktDATA, package = "UBStats") # Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] TEST.diffvar(x = AOV_M, y = AOV_F) TEST.diffvar(x = AOV_F, y = AOV_M) # same # Using x,by: groups identified by ordered levels of by TEST.diffvar(x = AOV, by = Gender, data=MktDATA) # Output results out_test.diffV<-TEST.diffvar(x = AOV_M, y = AOV_F) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/50000 SmallX_M <- SmallX[MktDATA$Gender == "M"] SmallX_F <- SmallX[MktDATA$Gender == "F"] # - Default output TEST.diffvar(x = SmallX_M, y = SmallX_F) # - Request to use the exact number of digits (default, 2) TEST.diffvar(x = SmallX_M, y = SmallX_F, force.digits = TRUE) # - Request to allow scientific notation TEST.diffvar(x = SmallX_M, y = SmallX_F, use.scientific = TRUE)
data(MktDATA, package = "UBStats") # Using x,y: build vectors with data on the two groups AOV_M <- MktDATA$AOV[MktDATA$Gender == "M"] AOV_F <- MktDATA$AOV[MktDATA$Gender == "F"] TEST.diffvar(x = AOV_M, y = AOV_F) TEST.diffvar(x = AOV_F, y = AOV_M) # same # Using x,by: groups identified by ordered levels of by TEST.diffvar(x = AOV, by = Gender, data=MktDATA) # Output results out_test.diffV<-TEST.diffvar(x = AOV_M, y = AOV_F) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/50000 SmallX_M <- SmallX[MktDATA$Gender == "M"] SmallX_F <- SmallX[MktDATA$Gender == "F"] # - Default output TEST.diffvar(x = SmallX_M, y = SmallX_F) # - Request to use the exact number of digits (default, 2) TEST.diffvar(x = SmallX_M, y = SmallX_F, force.digits = TRUE) # - Request to allow scientific notation TEST.diffvar(x = SmallX_M, y = SmallX_F, use.scientific = TRUE)
TEST.mean()
tests hypotheses on the mean of a population.
TEST.mean( x, sigma = NULL, mu0 = 0, alternative = "two.sided", digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
TEST.mean( x, sigma = NULL, mu0 = 0, alternative = "two.sided", digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the numeric
variable whose mean is of interest. |
sigma |
An optional numeric value specifying the
population standard deviation. If |
mu0 |
Numeric value that specifies the null hypothesis to test for (default is 0). |
alternative |
A length-one character vector specifying the direction
of the alternative hypothesis. Allowed values are |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the results of the test on the population mean. If the variance is unknown, the test is run using percentiles from both the normal and the Student's t distribution.
Raffaella Piccarreta [email protected]
CI.mean()
to build confidence intervals for the
population mean.
data(MktDATA, package = "UBStats") # Test on the mean; KNOWN variance # - Bilateral test TEST.mean(NStore_Purch, sigma = 9, mu0 = 5, alternative = "two.sided", data = MktDATA) # - Unilateral test TEST.mean(NStore_Purch, sigma = 9,mu0 = 5, alternative = "greater", data = MktDATA) # Test on the mean; UNKNOWN variance; # - Unilateral test TEST.mean(TotVal, mu0 = 600, alternative = "less", data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/500 # Default output TEST.mean(SmallX, mu0 = 0.1) # Request to use the exact number of digits (default, 2) TEST.mean(SmallX, mu0 = 0.1,force.digits=TRUE) # Request to allow scientific notation TEST.mean(SmallX, mu0 = 0.1,use.scientific=TRUE) # Output results out.test_mean<-TEST.mean(TotVal, mu0 = 600, alternative = "less", data = MktDATA)
data(MktDATA, package = "UBStats") # Test on the mean; KNOWN variance # - Bilateral test TEST.mean(NStore_Purch, sigma = 9, mu0 = 5, alternative = "two.sided", data = MktDATA) # - Unilateral test TEST.mean(NStore_Purch, sigma = 9,mu0 = 5, alternative = "greater", data = MktDATA) # Test on the mean; UNKNOWN variance; # - Unilateral test TEST.mean(TotVal, mu0 = 600, alternative = "less", data = MktDATA) # Arguments force.digits and use.scientific # An input variable taking very low values SmallX<-MktDATA$AOV/500 # Default output TEST.mean(SmallX, mu0 = 0.1) # Request to use the exact number of digits (default, 2) TEST.mean(SmallX, mu0 = 0.1,force.digits=TRUE) # Request to allow scientific notation TEST.mean(SmallX, mu0 = 0.1,use.scientific=TRUE) # Output results out.test_mean<-TEST.mean(TotVal, mu0 = 600, alternative = "less", data = MktDATA)
TEST.prop()
tests hypotheses on the proportion of successes in a
population.
TEST.prop( x, success = NULL, p0 = 0.5, alternative = "two.sided", digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
TEST.prop( x, success = NULL, p0 = 0.5, alternative = "two.sided", digits = 2, force.digits = FALSE, use.scientific = FALSE, data, ... )
x |
An unquoted string identifying the variable of interest.
|
success |
If |
p0 |
Numeric value that specifies the null hypothesis to test for (default is 0). |
alternative |
A length-one character vector specifying the direction
of the alternative hypothesis. Allowed values are |
digits |
Integer value specifying the number of
decimals used to round statistics; default to 2. If the chosen rounding formats some
non-zero values as zero, the number of decimals is increased
so that all values have at least one significant digit, unless the argument
|
force.digits |
Logical value indicating whether reported values
should be forcedly rounded to the number of decimals specified in
|
use.scientific |
Logical value indicating whether numbers
in tables should be displayed using
scientific notation ( |
data |
An optional data frame containing |
... |
Additional arguments to be passed to low level functions. |
A table reporting the results of the test on the population proportion of successes.
Raffaella Piccarreta [email protected]
CI.prop()
to build confidence intervals for the
population proportion of successes.
data(MktDATA, package = "UBStats") # Success = one value of a character vector or factor # - Bilateral test TEST.prop(WouldSuggest, success = "Yes", p0 = 0.7, data = MktDATA) # - Unilateral test, change digits TEST.prop(Education, success = "Post-Grad", p0 = 0.3, alternative = "less", digits = 4,data = MktDATA) # Success = numeric value; bilateral test TEST.prop(Children, success = 2, p0 = 0.3, data = MktDATA) # Binary variable (success = 1 by default); unilateral TEST.prop(LastCampaign, p0 = 0.1, alternative = "greater", digits = 3, data = MktDATA) # Logical variable (success = TRUE by default); unilateral test TEST.prop(Deals.ge50, p0 = 0.13, alternative = "greater", digits = 3, data = MktDATA) # Success based on combined conditions # - Build a (logical) vector IsTop <- MktDATA$CustClass == "Gold" | MktDATA$CustClass == "Platinum" TEST.prop(IsTop, p0 = 0.2, data = MktDATA) HighAOV <- MktDATA$AOV>150 TEST.prop(HighAOV, p0 = 0.1) TEST.prop(HighAOV, p0 = 0.1, force.digits = TRUE) TEST.prop(HighAOV, p0 = 0.1, use.scientific = TRUE) # Output results out_test_prop<-TEST.prop(IsTop, p0 = 0.2, data = MktDATA)
data(MktDATA, package = "UBStats") # Success = one value of a character vector or factor # - Bilateral test TEST.prop(WouldSuggest, success = "Yes", p0 = 0.7, data = MktDATA) # - Unilateral test, change digits TEST.prop(Education, success = "Post-Grad", p0 = 0.3, alternative = "less", digits = 4,data = MktDATA) # Success = numeric value; bilateral test TEST.prop(Children, success = 2, p0 = 0.3, data = MktDATA) # Binary variable (success = 1 by default); unilateral TEST.prop(LastCampaign, p0 = 0.1, alternative = "greater", digits = 3, data = MktDATA) # Logical variable (success = TRUE by default); unilateral test TEST.prop(Deals.ge50, p0 = 0.13, alternative = "greater", digits = 3, data = MktDATA) # Success based on combined conditions # - Build a (logical) vector IsTop <- MktDATA$CustClass == "Gold" | MktDATA$CustClass == "Platinum" TEST.prop(IsTop, p0 = 0.2, data = MktDATA) HighAOV <- MktDATA$AOV>150 TEST.prop(HighAOV, p0 = 0.1) TEST.prop(HighAOV, p0 = 0.1, force.digits = TRUE) TEST.prop(HighAOV, p0 = 0.1, use.scientific = TRUE) # Output results out_test_prop<-TEST.prop(IsTop, p0 = 0.2, data = MktDATA)