[banner]

An R Companion for the Handbook of Biological Statistics

Salvatore S. Mangiafico

Repeated G–tests of Goodness-of-Fit

These examples use the G.test function in the RVAideMemoire package, but the GTest function in the DescTools package could be used in the same manner.

 

When to use it

Null hypothesis

See the Handbook for information on these topics.

 

How to do the test

Repeated G–tests of goodness-of-fit example

 

### --------------------------------------------------------------
### Arm crossing example, Repeated G–tests of goodness-of-fit,
###      pp. 91
93
### --------------------------------------------------------------

Input =("
Ethnic.group  R    L
 Yemen        168  174
 Djerba       132  195
 Kurdistan    167  204
 Libya        162  212
 Berber       143  194
 Cochin       153  174
")

Data = read.table(textConnection(Input),header=TRUE)

 

 

Individual G-tests

 

library(RVAideMemoire)

Fun.G = function (Q){                           # Functions
          G.test(x=c(Q["R"], Q["L"]),           #   to calculate
                 p=c(0.5, 0.5)                  #   individual G’s,
                 )$statistic                    #   df’s, and p-values
               }

Fun.df = function (Q){
           G.test(x=c(Q["R"], Q["L"]),
                  p=c(0.5, 0.5)
                  )$parameter
               }

Fun.p = function (Q){
          G.test(x=c(Q["R"], Q["L"]),
                 p=c(0.5, 0.5)
                 )$p.value
               }


library(dplyr)

Data=
mutate(Data,
       Prop.R = R / (R + L),                         # Calculate proportion
                                                     #     of right arms
        G =       apply(Data[c("R", "L")], 1, Fun.G),
        df =      apply(Data[c("R", "L")], 1, Fun.df),
        p.Value = apply(Data[c("R", "L")], 1, Fun.p)
        )

Data

 

  Ethnic.group   R   L    Prop.R          G df     p.Value

1        Yemen 168 174 0.4912281  0.1052686  1 0.745596489

2       Djerba 132 195 0.4036697 12.2138397  1 0.000474363

3    Kurdistan 167 204 0.4501348  3.6961684  1 0.054537574

4        Libya 162 212 0.4331551  6.7045477  1 0.009616732

5       Berber 143 194 0.4243323  7.7478346  1 0.005377698

6       Cochin 153 174 0.4678899  1.3495524  1 0.245356383

 

 

Heterogeneity G-test

 

Data.matrix = as.matrix(Data[c("R", "L")])      # We need a data matrix
                                                #   to run G-test
Data.matrix                                     #   for heterogeneity

 

       R   L

[1,] 168 174

[2,] 132 195

[3,] 167 204

[4,] 162 212

[5,] 143 194

[6,] 153 174

 

G.test(Data.matrix)                             # Heterogeneity

 

G-test

G = 6.7504, df = 5, p-value = 0.2399

 

 

Pooled G-test

 

Total.R = sum(Data$R)                           # Set up data for pooled
Total.L = sum(Data$L)                           #   G-test

observed = c(Total.R, Total.L)
expected = c(0.5, 0.5)

G.test(x=observed,
       p=expected)

   

G-test for given probabilities

G = 25.0668, df = 1, p-value = 5.538e-07

 

 

Total G-test

 

Total.G  = sum(Data$G)                          # Set up data for total
                                                #   G-test                                     
Total.df = sum(Data$df)
 
Total.G                                         # Total

 

[1] 31.81721

 

 

Total.df

                              

[1] 6

 

 

pchisq(Total.G,
       df= Total.df,
       lower.tail=FALSE)

         

[1] 1.768815e-05

 

#     #     #

 

 

Example

Repeated G–tests of goodness-of-fit example

 

### --------------------------------------------------------------
### Drosophila example, Repeated G–tests of goodness-of-fit,
###      p. 93
### --------------------------------------------------------------

Input =("
 Trial       D    S
 'Trial 1'   296  366    
 'Trial 2'    78   72     
 'Trial 3'   417  467
")

Data = read.table(textConnection(Input),header=TRUE)

 

 

Individual G-tests

 

library(RVAideMemoire)

Fun.G = function (Q){                           # Functions
          G.test(x=c(Q["D"], Q["S"]),           #   to calculate
                 p=c(0.5, 0.5)                  #   individual G’s and
                 )$statistic                    #   p-values
               }

Fun.df = function (Q){
           G.test(x=c(Q["D"], Q["S"]),
                  p=c(0.5, 0.5)
                  )$parameter
                }

Fun.p = function (Q){
          G.test(x=c(Q["D"], Q["S"]),
                 p=c(0.5, 0.5)
                 )$p.value
                }


library(dplyr)

Data =
mutate(Data,
       G =       apply(Data[c("D", "S")], 1, Fun.G),
       df =      apply(Data[c("D", "S")], 1, Fun.df),
       p.Value = apply(Data[c("D", "S")], 1, Fun.p))

Data

 

    Trial   D   S        G df    p.Value

1 Trial 1 296 366 7.415668  1 0.00646583

2 Trial 2  78  72 0.240064  1 0.62415986

3 Trial 3 417 467 2.829564  1 0.09254347

 

 

Heterogeneity G-test

 

Data.matrix = as.matrix(Data[c("D", "S")])      # We need a data matrix
                                                #   to run G-test
Data.matrix                                     #   for heterogeneity

 

       D   S

[1,] 296 366

[2,]  78  72

[3,] 417 467

 

 

G.test(Data.matrix)                             # Heterogeneity

 

G-test

G = 2.8168, df = 2, p-value = 0.2445

 

 

Pooled G-test

 

Total.D = sum(Data$D)                           # Set up data for pooled
Total.S = sum(Data$S)                           #   G-test

observed = c(Total.D, Total.S)
expected = c(0.5, 0.5)

G.test(x=observed,                              # Pooled
       p=expected)

   

G-test for given probabilities

G = 7.6685, df = 1, p-value = 0.005619

 

 

Total G-test

 

Total.G = sum(Data$G)                           # Set up data for total
                                                #   G-test
degrees = 3
  
Total.G  = sum(Data$G)                          # Set up data for total
                                                #   G-test                                      
Total.df = sum(Data$df)

Total.G                                         # Total

 

[1] 10.4853

 

 

Total.df

                        

[1] 3

 

 

pchisq(Total.G,
       df=Total.df,
       lower.tail=FALSE)

         

[1] 0.01486097

 

#     #     #

 

 

Similar tests

See the Handbook for information on these topics.