Multiple responses are for the birds

The rimu package handles multiple-response data, a generalisation of factor data. With factor data, there is a defined set of categories and each observation comes from one category. With multiple-reponse data, there is a defined set of categories, but each observation could come from multiple categories. We provide two classes: mr for multiple-response presence/absence data, and ms for scored or ranked multiple-responses where each category gets a non-zero score or rank.

The birds dataset is a small subset of data from the Great Backyard Bird Count, in the US and Canada. We have counts of 12 birds by US county and Canadian province. The twelve birds are

data(birds)
names(birds)[1:12]
##  [1] "Phalaenoptilus nuttallii"      "Fregata magnificens"          
##  [3] "Melanerpes lewis"              "Melospiza georgiana"          
##  [5] "Rallus limicola"               "Myioborus pictus"             
##  [7] "Poecile gambeli"               "Aythya collaris"              
##  [9] "Xanthocephalus xanthocephalus" "Gracula religiosa"            
## [11] "Icterus parisorum"             "Coccyzus erythropthalmus"

There’s a thirteenth column giving the location name.

These birds are perhaps more familiar as

First, let’s put them into the data structures

bird_count <- as.ms(birds[,-13],na.rm=TRUE)
bird_presence <- as.mr(bird_count)

The bird counts will print like a sparse matrix

head(bird_count)
##      Phalaenoptilus nuttallii Fregata magnificens Melanerpes lewis
## [1,] .                        .                   .               
## [2,] .                        .                   .               
## [3,] .                        .                   .               
## [4,] .                        .                   .               
## [5,] .                        .                   .               
## [6,] .                        .                   .               
##      Melospiza georgiana Rallus limicola Myioborus pictus Poecile gambeli
## [1,] .                   .               .                .              
## [2,] .                   .               .                .              
## [3,] 5                   .               .                .              
## [4,] .                   .               .                30             
## [5,] .                   .               .                .              
## [6,] 1                   .               .                .              
##      Aythya collaris Xanthocephalus xanthocephalus Gracula religiosa
## [1,] .               .                             .                
## [2,] 1               .                             .                
## [3,] 4               .                             .                
## [4,] 10              .                             .                
## [5,] .               .                             .                
## [6,] 1               .                             .                
##      Icterus parisorum Coccyzus erythropthalmus
## [1,] .                 .                       
## [2,] .                 .                       
## [3,] .                 .                       
## [4,] .                 .                       
## [5,] .                 .                       
## [6,] .                 .

but the bird presence/absence data has a more compact character form

head(bird_presence)
## [1] ""                                    "Aythya collaris"                    
## [3] "Melospiza georgiana+Aythya collaris" "Poecile gambeli+Aythya collaris"    
## [5] ""                                    "Melospiza georgiana+Aythya collaris"

What birds are most often present?

mtable(bird_presence)
##  Phalaenoptilus nuttallii Fregata magnificens Melanerpes lewis
##     9                       16                  87            
##  Melospiza georgiana Rallus limicola Myioborus pictus Poecile gambeli
##   876                 121               4              317           
##  Aythya collaris Xanthocephalus xanthocephalus Gracula religiosa
##  1090              80                             1             
##  Icterus parisorum Coccyzus erythropthalmus
##     8                 1

And what birds tend to go together? We can draw an upset chart

plot(bird_presence,nsets=12)

That’s all a bit clumsy because of the long names,but you can see, for example, that the swamp sparrow and ring-necked duck tend to co-occur. Let’s recode to shorter names.

bird_presence<-mr_recode(bird_presence, 
  poorwill="Phalaenoptilus nuttallii",
  frigatebird="Fregata magnificens",       
  woodpecker ="Melanerpes lewis",          
  sparrow="Melospiza georgiana",   
  rail="Rallus limicola",      
  redstart="Myioborus pictus",          
  chickadee="Poecile gambeli",            
  duck="Aythya collaris",
  yellowhead="Xanthocephalus xanthocephalus",
  myna="Dracula religiosa",           
  oriole="Icterus parisorum",      
  cuckoo="Coccyzus erythropthalmus")
## Error in mr_recode.default(bird_presence, poorwill = "Phalaenoptilus nuttallii", : non-existent levels Dracula religiosa

Oops.

bird_presence<-mr_recode(bird_presence, 
  poorwill="Phalaenoptilus nuttallii",
  frigatebird="Fregata magnificens",       
  woodpecker ="Melanerpes lewis",          
  sparrow="Melospiza georgiana",   
  rail="Rallus limicola",      
  redstart="Myioborus pictus",          
  chickadee="Poecile gambeli",            
  duck="Aythya collaris",
  yellowhead="Xanthocephalus xanthocephalus",
  myna="Gracula religiosa",           
  oriole="Icterus parisorum",      
  cuckoo="Coccyzus erythropthalmus")

Now try again:

mtable(bird_presence)
##  poorwill frigatebird woodpecker sparrow rail redstart chickadee duck
##     9       16          87        876     121    4      317      1090
##  yellowhead myna oriole cuckoo
##    80          1    8      1
mtable(bird_presence,bird_presence)
##             poorwill frigatebird woodpecker sparrow rail redstart chickadee
## poorwill       9        0           2          0       3    0        5     
## frigatebird    0       16           0         12       8    0        0     
## woodpecker     2        0          87         13      29    3       72     
## sparrow        0       12          13        876      72    4       34     
## rail           3        8          29         72     121    4       52     
## redstart       0        0           3          4       4    4        3     
## chickadee      5        0          72         34      52    3      317     
## duck           8       13          70        593     114    4      188     
## yellowhead     2        3          27         22      22    3       43     
## myna           0        1           0          1       1    0        0     
## oriole         0        0           4          5       4    3        4     
## cuckoo         0        0           0          1       0    0        0     
##             duck yellowhead myna oriole cuckoo
## poorwill       8    2          0    0      0  
## frigatebird   13    3          1    0      0  
## woodpecker    70   27          0    4      0  
## sparrow      593   22          1    5      1  
## rail         114   22          1    4      0  
## redstart       4    3          0    3      0  
## chickadee    188   43          0    4      0  
## duck        1090   73          1    7      1  
## yellowhead    73   80          0    5      0  
## myna           1    0          1    0      0  
## oriole         7    5          0    8      0  
## cuckoo         1    0          0    0      1
plot(bird_presence, nsets=12,nint=30)

The default image plot is of the table of the variable by itself and shows the number of co-occurences. With type="conditional", the plot shows the proportion of each bird (on the y-axis) given that a particular bird (on the x-axis) is present.

image(bird_presence)

image(bird_presence, type="conditional")

We might want to focus on just the more commonly observed birds

common_birds<-mr_lump(bird_presence,n=4)
mtable(common_birds)
##  sparrow rail chickadee duck Other
##   876     121  317      1090  163
mtable(common_birds,common_birds)
##           sparrow rail chickadee duck Other
## sparrow    876      72   34       593   44 
## rail        72     121   52       114   48 
## chickadee   34      52  317       188   97 
## duck       593     114  188      1090  135 
## Other       44      48   97       135  163
plot(common_birds)

Or consider just the rare and interesting ones

rare_birds<-mr_lump(bird_presence,n=-5,other_level="Common")
mtable(rare_birds)
##  poorwill redstart myna oriole cuckoo Common
##     9        4        1    8      1   1513
mtable(rare_birds,rare_birds)
##          poorwill redstart myna oriole cuckoo Common
## poorwill    9        0        0    0      0      9  
## redstart    0        4        0    3      0      4  
## myna        0        0        1    0      0      1  
## oriole      0        3        0    8      0      7  
## cuckoo      0        0        0    0      1      1  
## Common      9        4        1    7      1   1513
plot(rare_birds,nsets=6)

plot(mr_drop(rare_birds,"Common"),nsets=5)