An R Companion for the Handbook of Biological Statistics

Salvatore S. Mangiafico

Repeated G–tests of Goodness-of-Fit

These examples use the G.test function in the RVAideMemoire package, but the GTest function in the DescTools package could be used in the same manner.

When to use it

Null hypothesis

See the Handbook for information on these topics.

How to do the test

Repeated G–tests of goodness-of-fit example

### --------------------------------------------------------------
### Arm crossing example, Repeated G–tests of goodness-of-fit,
###      pp. 91
93
### --------------------------------------------------------------

Input =("
Ethnic.group  R    L
Yemen        168  174
Djerba       132  195
Kurdistan    167  204
Libya        162  212
Berber       143  194
Cochin       153  174
")

Individual G-tests

library(RVAideMemoire)

Fun.G = function (Q){                           # Functions
G.test(x=c(Q["R"], Q["L"]),           #   to calculate
p=c(0.5, 0.5)                  #   individual G’s,
)\$statistic                    #   df’s, and p-values
}

Fun.df = function (Q){
G.test(x=c(Q["R"], Q["L"]),
p=c(0.5, 0.5)
)\$parameter
}

Fun.p = function (Q){
G.test(x=c(Q["R"], Q["L"]),
p=c(0.5, 0.5)
)\$p.value
}

library(dplyr)

Data=
mutate(Data,
Prop.R = R / (R + L),                         # Calculate proportion
#     of right arms
G =       apply(Data[c("R", "L")], 1, Fun.G),
df =      apply(Data[c("R", "L")], 1, Fun.df),
p.Value = apply(Data[c("R", "L")], 1, Fun.p)
)

Data

Ethnic.group   R   L    Prop.R          G df     p.Value

1        Yemen 168 174 0.4912281  0.1052686  1 0.745596489

2       Djerba 132 195 0.4036697 12.2138397  1 0.000474363

3    Kurdistan 167 204 0.4501348  3.6961684  1 0.054537574

4        Libya 162 212 0.4331551  6.7045477  1 0.009616732

5       Berber 143 194 0.4243323  7.7478346  1 0.005377698

6       Cochin 153 174 0.4678899  1.3495524  1 0.245356383

Heterogeneity G-test

Data.matrix = as.matrix(Data[c("R", "L")])      # We need a data matrix
#   to run G-test
Data.matrix                                     #   for heterogeneity

R   L

[1,] 168 174

[2,] 132 195

[3,] 167 204

[4,] 162 212

[5,] 143 194

[6,] 153 174

G.test(Data.matrix)                             # Heterogeneity

G-test

G = 6.7504, df = 5, p-value = 0.2399

Pooled G-test

Total.R = sum(Data\$R)                           # Set up data for pooled
Total.L = sum(Data\$L)                           #   G-test

observed = c(Total.R, Total.L)
expected = c(0.5, 0.5)

G.test(x=observed,
p=expected)

G-test for given probabilities

G = 25.0668, df = 1, p-value = 5.538e-07

Total G-test

Total.G  = sum(Data\$G)                          # Set up data for total
#   G-test
Total.df = sum(Data\$df)

Total.G                                         # Total

[1] 31.81721

Total.df

[1] 6

pchisq(Total.G,
df= Total.df,
lower.tail=FALSE)

[1] 1.768815e-05

#     #     #

Example

Repeated G–tests of goodness-of-fit example

### --------------------------------------------------------------
### Drosophila example, Repeated G–tests of goodness-of-fit,
###      p. 93
### --------------------------------------------------------------

Input =("
Trial       D    S
'Trial 1'   296  366
'Trial 2'    78   72
'Trial 3'   417  467
")

Individual G-tests

library(RVAideMemoire)

Fun.G = function (Q){                           # Functions
G.test(x=c(Q["D"], Q["S"]),           #   to calculate
p=c(0.5, 0.5)                  #   individual G’s and
)\$statistic                    #   p-values
}

Fun.df = function (Q){
G.test(x=c(Q["D"], Q["S"]),
p=c(0.5, 0.5)
)\$parameter
}

Fun.p = function (Q){
G.test(x=c(Q["D"], Q["S"]),
p=c(0.5, 0.5)
)\$p.value
}

library(dplyr)

Data =
mutate(Data,
G =       apply(Data[c("D", "S")], 1, Fun.G),
df =      apply(Data[c("D", "S")], 1, Fun.df),
p.Value = apply(Data[c("D", "S")], 1, Fun.p))

Data

Trial   D   S        G df    p.Value

1 Trial 1 296 366 7.415668  1 0.00646583

2 Trial 2  78  72 0.240064  1 0.62415986

3 Trial 3 417 467 2.829564  1 0.09254347

Heterogeneity G-test

Data.matrix = as.matrix(Data[c("D", "S")])      # We need a data matrix
#   to run G-test
Data.matrix                                     #   for heterogeneity

D   S

[1,] 296 366

[2,]  78  72

[3,] 417 467

G.test(Data.matrix)                             # Heterogeneity

G-test

G = 2.8168, df = 2, p-value = 0.2445

Pooled G-test

Total.D = sum(Data\$D)                           # Set up data for pooled
Total.S = sum(Data\$S)                           #   G-test

observed = c(Total.D, Total.S)
expected = c(0.5, 0.5)

G.test(x=observed,                              # Pooled
p=expected)

G-test for given probabilities

G = 7.6685, df = 1, p-value = 0.005619

Total G-test

Total.G = sum(Data\$G)                           # Set up data for total
#   G-test
degrees = 3

Total.G  = sum(Data\$G)                          # Set up data for total
#   G-test
Total.df = sum(Data\$df)

Total.G                                         # Total

[1] 10.4853

Total.df

[1] 3

pchisq(Total.G,
df=Total.df,
lower.tail=FALSE)

[1] 0.01486097

#     #     #

Similar tests

See the Handbook for information on these topics.