Load packages: library(igraph)
library(igraphdata)
Working with network data in igraph is fairly straightforward. For this tutorial, we'll start with loading some foodweb networks from the igraphdata
package.
data(foodwebs)
foodwebs #there are multiple networks in this dataset, so first we make them into individual network objects
$ChesLower
IGRAPH e548ef8 DNW- 37 178 -- Lower Chesapeake Bay in Summer
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from e548ef8 (vertex names):
[1] Input ->Net Phytoplankton Input ->Picoplankton
[3] Input ->Microphytobenthos Input ->SAV
[5] Input ->Oysters Input ->Blue Crab
[7] Input ->Herrings and Shads Input ->White Perch
[9] Input ->Spot Input ->American eel
[11] Input ->Catfish Input ->DOC
[13] Input ->Sediment POC Oysters ->Output
+ ... omitted several edges
$ChesMiddle
IGRAPH 00f5049 DNW- 37 209 -- Middle Chesapeake Bay in Summer
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 00f5049 (vertex names):
[1] Input ->Net Phytoplankton Input ->Picoplankton
[3] Input ->Microphytobenthos Input ->SAV
[5] Input ->Deposit Feeding Benthos Input ->Suspension Feeding Benthos
[7] Input ->Oysters Input ->Blue Crab
[9] Input ->Herrings and Shads Input ->American eel
[11] Input ->Sediment POC Oysters ->Output
[13] Blue Crab->Output Menhaden ->Output
+ ... omitted several edges
$ChesUpper
IGRAPH babc2a0 DNW- 37 215 -- Upper Chesapeake Bay in Summer
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from babc2a0 (vertex names):
[1] Input ->Net Phytoplankton Input ->Picoplankton
[3] Input ->Microphytobenthos Input ->SAV
[5] Input ->Oysters Input ->Blue Crab
[7] Input ->American Eel Input ->DOC
[9] Input ->Sediment POC Input ->POC
[11] Oysters ->Output Blue Crab ->Output
[13] Menhaden ->Output Bay anchovy->Output
+ ... omitted several edges
$Chesapeake
IGRAPH 795200f DNW- 39 177 -- Chesapeake Bay Mesohaline Network
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 795200f (vertex names):
[1] Input ->phytoplankton
[2] Input ->benthic diatoms
[3] Input ->dissolved organic carbon
[4] Input ->suspended particulate org
[5] zooplankton ->Output
[6] mya arenaria ->Output
[7] oysters ->Output
+ ... omitted several edges
$CrystalC
IGRAPH 351cc8c DNW- 24 125 -- Crystal River Creek (Control)
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 351cc8c (vertex names):
[1] Input ->macrophytes Input ->bay anchovy
[3] Input ->needlefish Input ->gulf killifish
[5] Input ->pinfish zooplankton ->Output
[7] benthic invertebrates->Output blacktip shark ->Output
[9] stingray ->Output striped anchovy ->Output
[11] bay anchovy ->Output needlefish ->Output
[13] sheepshead killifish ->Output goldspotted killifish->Output
+ ... omitted several edges
$CrystalD
IGRAPH 707bdd2 DNW- 24 100 -- Crystal River Creek (Delta Temp)
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 707bdd2 (vertex names):
[1] Input ->macrophytes Input ->pinfish
[3] macrophytes ->Output zooplankton ->Output
[5] benthic invertebrates->Output bay anchovy ->Output
[7] catfish ->Output needlefish ->Output
[9] goldspotted killifish->Output gulf killifish ->Output
[11] longnosed killifish ->Output molly ->Output
[13] silverside ->Output moharra ->Output
+ ... omitted several edges
$Maspalomas
IGRAPH abb9844 DNW- 24 82 -- Charca de Maspalomas
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from abb9844 (vertex names):
[1] Input ->Cyanobacteria Input ->Eukaryotic Phyto
[3] Input ->Chara globularis Input ->Ruppia Maritima
[5] Input ->Cladophora Input ->Periphyton
[7] Gallinula chloropus->Output DOC ->Output
[9] Sedimented POC ->Output Cyanobacteria ->Respiration
[11] Eukaryotic Phyto ->Respiration Chara globularis ->Respiration
[13] Ruppia Maritima ->Respiration Cladophora ->Respiration
+ ... omitted several edges
$Michigan
IGRAPH f6c2f6e DNW- 39 221 -- Lake Michigan Control network
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from f6c2f6e (vertex names):
[1] Input ->Flagellates Input ->Blue-greenGree
[3] Input ->Diatoms Input ->Bythotrephes
[5] Input ->Zebra mussels Bythotrephes ->Output
[7] Zebra mussels ->Output Bloater ->Output
[9] Rainbow smelt ->Output Slimy sculpin ->Output
[11] Deepwater sculp->Output Lake Whitefish ->Output
[13] Yellow perch ->Output Burbot ->Output
+ ... omitted several edges
$Mondego
IGRAPH b6463ad DNW- 46 400 -- Mondego Estuary - Zostrea site
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from b6463ad (vertex names):
[1] Input ->Phytoplankton Input ->Enteromorpha sp
[3] Input ->Ulva lactuca Input ->Zostera
[5] Input ->Epiphytes Input ->Gracilaria
[7] Input ->Macrofauna predators Input ->Larus ridibundus
[9] Input ->Larus fuscus Detritus ->Output
[11] Phytoplankton ->Respiration Enteromorpha sp->Respiration
[13] Ulva lactuca ->Respiration Zostera ->Respiration
+ ... omitted several edges
$Narragan
IGRAPH 0a0c658 DNW- 35 220 -- Narragansett Bay Model
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 0a0c658 (vertex names):
[1] Input ->Benthic Alage Input ->Phytoplankton
[3] Input ->Detritus Bluefish ->Output
[5] Striped Bass ->Output Winter Flounder->Output
[7] Windowpane ->Output Scup ->Output
[9] Tautog ->Output Dogfish ->Output
[11] Skates ->Output Longfin Squid ->Output
[13] Butterfish ->Output Menhaden ->Output
+ ... omitted several edges
$Rhode
IGRAPH 87ad3f1 DNW- 20 53 -- Rhode River Watershed - Water Budget
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 87ad3f1 (vertex names):
[1] Input->crop land Input->pasture land
[3] Input->upland forest Input->riparian forest adj. crop
[5] Input->riparian forest adj. past Input->riparian forest adj. upla
[7] Input->flooded swamp forest Input->herbaceous wetland
[9] Input->floodplain forest Input->low marsh adj. north fork
[11] Input->low marsh adj. main fork Input->muddy creek
[13] Input->mud flat Input->high marsh
+ ... omitted several edges
$StMarks
IGRAPH 09c5325 DNW- 54 356 -- St. Marks River (Florida) Flow network
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 09c5325 (vertex names):
[1] Input->Phytoplankton Input->Halodule
[3] Input->Micro-epiphytes Input->Macro-epiphytes
[5] Input->Benthic algae Input->Zooplankton
[7] Input->Epiphyte-graz amphipods Input->suspension-feed molluscs
[9] Input->Suspension-feed polychts Input->Benthic bact
[11] Input->Microfauna Input->Deposit feed amphipods
[13] Input->Herbivorous shrimp Input->Deposit-feed gastropod
+ ... omitted several edges
$baydry
IGRAPH 273e289 DNW- 128 2137 -- Florida Bay Trophic Exchange Matrix, dry season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 273e289 (vertex names):
[1] Input->2um Spherical Phytoplankt Input->Synedococcus
[3] Input->Oscillatoria Input->Small Diatoms (<20um)
[5] Input->Big Diatoms (>20um) Input->Dinoflagellates
[7] Input->Other Phytoplankton Input->Benthic Phytoplankton
[9] Input->Thalassia Input->Halodule
[11] Input->Syringodium Input->Roots
[13] Input->Drift Algae Input->Epiphytes
+ ... omitted several edges
$baywet
IGRAPH ccb5ef1 DNW- 128 2106 -- Florida Bay Trophic Exchange Matrix, wet season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from ccb5ef1 (vertex names):
[1] Input->2um Spherical Phytoplankt Input->Synedococcus
[3] Input->Oscillatoria Input->Small Diatoms (<20um)
[5] Input->Big Diatoms (>20um) Input->Dinoflagellates
[7] Input->Other Phytoplankton Input->Benthic Phytoplankton
[9] Input->Thalassia Input->Halodule
[11] Input->Syringodium Input->Roots
[13] Input->Drift Algae Input->Epiphytes
+ ... omitted several edges
$cypdry
IGRAPH 06b3144 DNW- 71 640 -- Cypress Dry Season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 06b3144 (vertex names):
[1] Input->Living POC Input->Phytoplankton
[3] Input->Float. vegetation Input->Periphyton/Macroalgae
[5] Input->Macrophytes Input->Epiphytes
[7] Input->Understory Input->Vine Leaves
[9] Input->Hardwoods Leaves Input->Cypress Leaves
[11] Input->Cypress Wood Input->HW Wood
[13] Input->Roots Input->Egrets
+ ... omitted several edges
$cypwet
IGRAPH e146917 DNW- 71 631 -- Cypress Wet Season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from e146917 (vertex names):
[1] Input->Living POC Input->Phytoplankton Input->Float Veg.
[4] Input->Periphyton Input->Macrophytes Input->Epiphytes
[7] Input->Understory Input->Vine L Input->Hardwood L
[10] Input->Cypress L Input->Cypress W Input->Hardwood W
[13] Input->Roots Input->Egrets Input->GB Heron
[16] Input->Other Herons Input->Wood stork Input->White ibis
[19] Input->Refractory Det. Input->Liable Det.
+ ... omitted several edges
$gramdry
IGRAPH 133721a DNW- 69 915 -- Everglades Graminoids - Dry Season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 133721a (vertex names):
[1] Input ->Periphyton Input ->Macrophytes
[3] Input ->Utricularia Input ->Floating Veg.
[5] Input ->Lizards Apple snail ->Output
[7] Freshwater Prawn ->Output Mesoinverts ->Output
[9] Other Macroinverts ->Output Large Aquatic Insects->Output
[11] Gar ->Output Shiners & Minnows ->Output
[13] Chubsuckers ->Output Catfish ->Output
+ ... omitted several edges
$gramwet
IGRAPH 50c102b DNW- 69 916 -- Everglades Graminoids - Wet Season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 50c102b (vertex names):
[1] Input ->Periphyton Input ->Macrophytes
[3] Input ->Utricularia Input ->Floating Veg.
[5] Input ->Lizards Living POC ->Output
[7] Apple snail ->Output Freshwater Prawn ->Output
[9] Mesoinverts ->Output Other Macroinverts ->Output
[11] Large Aquatic Insects->Output Gar ->Output
[13] Shiners & Minnows ->Output Chubsuckers ->Output
+ ... omitted several edges
$mangdry
IGRAPH 2ffd52d DNW- 97 1491 -- Mangrove Estuary, Dry Season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from 2ffd52d (vertex names):
[1] Input->PHY Input->OTH. PP Input->LEAF Input->WOOD
[5] Input->ROOT Input->MERO Input->L & G Input->PELC
[9] Input->CORM Input->BH & E Input->SE & E Input->IBIS
[13] Input->DUCK1 Input->DUCK2 Input->DUCK3 Input->VULT
[17] Input->K & H Input->MRAPT Input->GUIF Input->SSBIRDS
[21] Input->G & T Input->C & C Input->OWLS Input->WOODP
[25] Input->PASSOMN Input->PASSPERD Input->POC Input->DOC
+ ... omitted several edges
$mangwet
IGRAPH d3c50a5 DNW- 97 1492 -- Mangrove Estuary, Wet Season
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from d3c50a5 (vertex names):
[1] Input->PHY Input->OTH. PP Input->LEAF Input->WOOD
[5] Input->ROOT Input->MERO Input->L & G Input->PELC
[9] Input->CORM Input->BH & E Input->SE & E Input->IBIS
[13] Input->DUCK1 Input->DUCK2 Input->DUCK3 Input->VULT
[17] Input->K & H Input->MRAPT Input->GUIF Input->SSBIRDS
[21] Input->G & T Input->C & C Input->OWLS Input->WOODP
[25] Input->PASSOMN Input->PASSPERD Input->POC Input->DOC
+ ... omitted several edges
ChesLower <- foodwebs$ChesLower
ChesMiddle <- foodwebs$ChesMiddle
ChesUpper <- foodwebs$ChesUpper
Let's check out our igraph object: ChesUpper
. What can we learn about the network from this summary? Is the network directed or undirected? What are the edge and vertex attributes?
IGRAPH babc2a0 DNW- 37 215 -- Upper Chesapeake Bay in Summer
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), weight (e/n)
+ edges from babc2a0 (vertex names):
[1] Input ->Net Phytoplankton Input ->Picoplankton
[3] Input ->Microphytobenthos Input ->SAV
[5] Input ->Oysters Input ->Blue Crab
[7] Input ->American Eel Input ->DOC
[9] Input ->Sediment POC Input ->POC
[11] Oysters ->Output Blue Crab ->Output
[13] Menhaden ->Output Bay anchovy->Output
+ ... omitted several edges
Let's examine the node attributes. V(ChesUpper)
lists all the vertices in the graph.
+ 37/37 vertices, named, from babc2a0:
[1] Net Phytoplankton Picoplankton
[3] Free Bacteria Particle Attached Bacteria
[5] Heteroflagellates Ciliates
[7] Rotifers Meroplankton
[9] Mesozooplankton Ctenophores
[11] Chrysaora Microphytobenthos
[13] SAV Benthic Bacteria
[15] Meiofauna Deposit Feeding Benthos
[17] Suspension Feeding Benthos Oysters
[19] Blue Crab Menhaden
+ ... omitted several vertices
V(ChesUpper)$name
lists all the name
attribute values for the vertices in the graph. Note the difference between the output of these two commands.
[1] "Net Phytoplankton" "Picoplankton"
[3] "Free Bacteria" "Particle Attached Bacteria"
[5] "Heteroflagellates" "Ciliates"
[7] "Rotifers" "Meroplankton"
[9] "Mesozooplankton" "Ctenophores"
[11] "Chrysaora" "Microphytobenthos"
[13] "SAV" "Benthic Bacteria"
[15] "Meiofauna" "Deposit Feeding Benthos"
[17] "Suspension Feeding Benthos" "Oysters"
[19] "Blue Crab" "Menhaden"
[21] "Bay anchovy" "Herrings and Shads"
[23] "White Perch" "Spot"
[25] "Croaker" "Hogchoker"
[27] "American Eel" "Catfish"
[29] "Striped Bass" "Bluefish"
[31] "Weakfish" "DOC"
[33] "Sediment POC" "POC"
[35] "Input" "Output"
[37] "Respiration"
Biomass
attribute? This graph currently has one edge attribute: "weight". How might you examine this attribute?V(ChesUpper)$Biomass
[1] 1356.000 239.000 649.000 36.000 30.000 66.000
[7] 14.000 3.200 282.000 17.000 0.001 293.000
[13] 2086.000 30.000 700.000 2368.000 27232.000 0.001
[19] 610.000 2136.000 287.000 212.000 282.000 195.000
[25] 50.000 100.000 35.000 450.000 172.000 68.000
[31] 67.000 12504.000 201670.000 5249.000 0.000 0.000
[37] 0.000
E(ChesUpper)$weight
[1] 9.105056e+04 2.267064e+04 2.156480e+04 1.535296e+03 1.000000e-07
[6] 1.196000e+01 2.024000e+00 8.800000e+01 2.428800e+04 9.731000e+03
[11] 1.000000e-07 1.674400e+02 4.416000e+03 3.680000e+02 1.288000e+01
[16] 1.656000e+01 1.159200e+01 2.208000e+01 1.932000e+01 4.232000e+01
[21] 0.000000e+00 0.000000e+00 0.000000e+00 2.493200e+04 5.520000e+03
[26] 4.239268e+04 3.444480e+03 5.987544e+03 8.804400e+03 3.864000e+02
[31] 1.477520e+02 3.126252e+03 1.107680e+02 0.000000e+00 5.391200e+03
[36] 0.000000e+00 7.286400e+03 4.508000e+03 4.574976e+03 3.131680e+04
[41] 0.000000e+00 1.661520e+02 1.218374e+04 3.193688e+03 2.824400e+02
[46] 7.516400e+01 2.598080e+02 6.660800e+01 7.084000e+02 1.444400e+01
[51] 6.541200e+01 2.990920e+02 3.245760e+02 2.928360e+02 0.000000e+00
[56] 0.000000e+00 0.000000e+00 1.518000e+04 9.660000e+02 2.409480e+02
[61] 1.097818e+04 2.054378e+04 1.768608e+04 4.188760e+02 5.234800e+01
[66] 5.234800e+01 3.036000e+03 1.288000e+02 4.912800e+01 2.360904e+03
[71] 5.135992e+03 5.151816e+03 1.288000e+03 2.602680e+04 6.072000e+03
[76] 7.176000e+01 7.176000e+01 2.054360e+03 2.645000e+02 2.645000e+02
[81] 6.072000e+03 6.440000e+01 6.951428e+03 6.951428e+03 1.288000e+02
[86] 1.720400e+02 8.263164e+03 3.841920e+02 3.514400e+01 6.286176e+03
[91] 6.286176e+03 4.876000e+00 1.180360e+02 7.682000e+01 6.992000e+00
[96] 3.473920e+02 3.473920e+02 1.067200e+01 7.682000e+01 6.992000e+00
[101] 1.246600e+02 1.246600e+02 7.083080e+02 5.105080e+02 9.825600e+02
[106] 6.915180e+03 6.319480e+02 9.384000e+00 5.362496e+03 5.362496e+03
[111] 9.200000e-02 2.104960e+02 2.104960e+02 9.016000e+03 3.812480e+03
[116] 1.672560e+03 1.672560e+03 1.535296e+03 1.803200e+03 7.624960e+02
[121] 5.531040e+02 3.812480e+03 6.734400e+01 1.867600e+01 6.458400e+01
[126] 1.656000e+01 9.544356e+03 6.734400e+01 3.735200e+01 2.583360e+02
[131] 6.624000e+01 5.520000e+02 1.775600e+01 1.978000e+01 3.348800e+01
[136] 8.660420e+03 9.622280e+02 4.040640e+02 5.602800e+01 2.583360e+02
[141] 6.624000e+01 6.900000e+02 1.775600e+01 1.141720e+02 1.978000e+01
[146] 3.348800e+01 3.487122e+04 3.487122e+04 1.978000e+01 1.660600e+02
[151] 1.660600e+02 1.978000e+02 8.887200e+01 6.706800e+01 1.348904e+03
[156] 1.348904e+03 1.067200e+01 6.734400e+01 4.673600e+01 3.266000e+01
[161] 5.934000e+01 1.332160e+02 1.592520e+02 1.806328e+03 1.806328e+03
[166] 3.956000e+01 1.110440e+02 2.097600e+01 1.241080e+02 1.241080e+02
[171] 1.186800e+01 2.097600e+01 3.293600e+01 3.293600e+01 1.978000e+01
[176] 8.887200e+01 6.283600e+01 1.072720e+02 1.072720e+02 2.217200e+01
[181] 3.836400e+01 3.836400e+01 7.912000e+00 2.097600e+01 3.213560e+02
[186] 3.213560e+02 5.704000e+00 5.704000e+00 4.038800e+01 4.038800e+01
[191] 3.726000e+01 3.726000e+01 5.032400e+01 5.032400e+01 4.213600e+01
[196] 4.213600e+01 7.463500e+04 1.040520e+04 7.212800e+03 6.862464e+03
[201] 6.734400e+01 1.628400e+01 8.696760e+03 1.304514e+04 6.027840e+03
[206] 2.456400e+01 1.180452e+03 7.498497e+04 9.825600e+02 2.305520e+02
[211] 2.106800e+01 1.867600e+01 6.458400e+01 1.656000e+01 1.380000e+02
You can also add additional node or edge attribute data to the graph object. First, we make vectors of the attribute data based on the names. Let's make one vector defining whether the node is a crab.
#vector for whether or not the node is a crab
CrabYN <- V(ChesUpper)$name
CrabYN<- ifelse(CrabYN== "Blue Crab", "Y", "N")
%like%
operator.OrgType <- V(ChesUpper)$name
#PlanktonYN %>% replace(PlanktonYN, PlanktonYN %like% "plankton", "Plankton")
OrgType<- ifelse(OrgType %like% "plankton","Plankton", ifelse(OrgType %like% "Bacteria", "Bacteria", "Other"))
Next, we can assign these attributes to the nodes in our graph. There are two ways to do this in igraph. You already know one of the ways from the lesson on dataframe manipulation. Assign the OrgType
vector as a node attribute in our ChesUpper
graph.
V(ChesUpper)$OrgType <- OrgType
V(ChesUpper)$OrgType
[1] "Plankton" "Plankton" "Bacteria" "Bacteria" "Other" "Other"
[7] "Other" "Plankton" "Plankton" "Other" "Other" "Other"
[13] "Other" "Bacteria" "Other" "Other" "Other" "Other"
[19] "Other" "Other" "Other" "Other" "Other" "Other"
[25] "Other" "Other" "Other" "Other" "Other" "Other"
[31] "Other" "Other" "Other" "Other" "Other" "Other"
[37] "Other"
igraph also has a built in function to set vertex and edge attributes. Use the function set.vertex.attribute()
to create a new vertex attribute named CrabYN
using the CrabYN
vector.
ChesUpper <- set.vertex.attribute(ChesUpper,"CrabYN", value=CrabYN)
V(ChesUpper)$CrabYN
[1] "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "Y"
[20] "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N" "N"
Now check out our graph data again: ChesUpper
. Note that the new node attributes have been appended.
IGRAPH babc2a0 DNW- 37 215 -- Upper Chesapeake Bay in Summer
+ attr: Citation (g/c), Author (g/c), URL (g/c), name (g/c), name
| (v/c), ECO (v/n), Biomass (v/n), OrgType (v/c), CrabYN (v/c), weight
| (e/n)
+ edges from babc2a0 (vertex names):
[1] Input ->Net Phytoplankton Input ->Picoplankton
[3] Input ->Microphytobenthos Input ->SAV
[5] Input ->Oysters Input ->Blue Crab
[7] Input ->American Eel Input ->DOC
[9] Input ->Sediment POC Input ->POC
[11] Oysters ->Output Blue Crab ->Output
+ ... omitted several edges
When the igraph package is loaded, you can simply run the plot()
function on an igraph object and R will know what to do.
##make some quick plots
plot(ChesLower)
plot(ChesMiddle)
plot(ChesUpper)
The default graphical parameters do not look that great. For now, let's make the node labels and node size smaller and change the color. The default node size is 15.
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, vertex.color="tomato")
Or turn the node labels completely off.
plot(ChesUpper,vertex.size=20,vertex.label=NA, vertex.color="tomato")
igraph has many different layouts for network visualization. Let's try out some different layouts.
par(mfrow=c(3,2), mar=c(0.2,1,1,0.2))
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout.circle)
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout.star)
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout_as_tree)
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout.auto)
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout_nicely)
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout.grid)
par(mfrow=c(1,1))
Often network visualization uses kamada-kawai or fruchterman-reingold layouts.
##The ones we usually use
par(mfrow=c(1,2))
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout.fruchterman.reingold)
plot(ChesUpper,vertex.size=10,vertex.label.cex=0.5, layout=layout.kamada.kawai)
par(mfrow=c(1,1)) #reset parameters
Seems like kamada-kawai might be best suited for this network. Read a little more about force-directed graph drawing here
We can also assign vertex colors based on node attributes. Create a new node attribute of three different colors based on the OrgType
attribute. Then assign this node attribute to the vertex colors and plot the network.
V(ChesUpper)$OrgColor <- ifelse(V(ChesUpper)$OrgType == "Bacteria","tomato", ifelse(V(ChesUpper)$OrgType == "Plankton","royal blue", "dark grey"))
plot(ChesUpper,vertex.size=6,vertex.label=NA, layout=layout.kamada.kawai, vertex.color=V(ChesUpper)$OrgColor)
legend(x="bottomleft", legend=unique(V(ChesUpper)$OrgType), pch=19, cex=0.7, col=unique(V(ChesUpper)$OrgColor))
We can also alter the arrow size, label font and many other plotting parameters to make a more interpretable graph.
plot(ChesUpper,vertex.size=8,vertex.label=NA, layout=layout.kamada.kawai, vertex.color="turquoise",vertex.label.family="Helvetica", vertex.label.color="goldenrod", edge.arrow.size=0.1, vertex.frame.color="corn silk", edge.color="light grey")
In the next section of the lesson, we will explore a dataset of scene interactions from the 1981 Indiana Jones movie, Raiders of the Lost Ark. These data are drawn from the MovieGalaxies database. The edges in this network represent the interactions between characters in this movie.
The first step is to load the data: dat <- read.csv("filepath", header=T, row.names=1)
. You can get the data here.
Take a look at the data: str(LostArk)
'data.frame': 134 obs. of 3 variables:
$ ego : Factor w/ 27 levels "ANGELA","BETSY",..: 2 2 2 2 2 2 2 2 2 2 ...
$ alter : Factor w/ 30 levels "ANGELA","CHARLES",..: 18 17 1 11 22 27 5 14 29 6 ...
$ weight: int 1 1 1 1 1 1 2 2 1 1 ...
Or just the dataframe LostArk
ego alter weight
1 BETSY MEPHISTO 1
2 BETSY MARCUS 1
3 BETSY ANGELA 1
4 BETSY GUTTERBUHG 1
5 BETSY POSTMAN 1
6 BETSY TEDDY 1
7 BETSY DASHIELL 2
8 BETSY KEZURE 2
9 BETSY VIRGIL 1
10 BETSY DEAN COVENTRY 1
11 BETSY INDIANA 10
12 BETSY SCRAGGY 8
13 BETSY CREW MEMBER 1
14 BETSY CLARE 7
15 BETSY PRIESTLY 1
16 BETSY JULIA 1
17 BETSY CHARLES 1
18 BETSY PORTER 1
19 BETSY REBECCA 1
20 BETSY SUN WU KUNG 1
21 MARCUS INDIANA 1
22 ANGELA INDIANA 1
23 ANGELA PRIESTLY 1
24 ANGELA POSTMAN 1
25 ANGELA TEDDY 1
26 ANGELA JULIA 1
27 ANGELA CHARLES 1
28 ANGELA REBECCA 1
29 ANGELA VIRGIL 1
30 ANGELA DEAN COVENTRY 1
31 YOUNG MAN ELDERLY WOMAN 1
32 YOUNG MAN INDIANA 1
33 YOUNG MAN OLD MAN 1
34 YOUNG MAN MACGOWAN 1
35 YOUNG MAN FIRST DRAFT 1
36 YOUNG MAN MAN 1
37 YOUNG MAN OLD WOMAN 1
38 YOUNG MAN WOMAN 1
39 TEDDY PRIESTLY 1
40 TEDDY POSTMAN 1
41 TEDDY INDIANA 1
42 TEDDY JULIA 1
43 TEDDY CHARLES 1
44 TEDDY REBECCA 1
45 TEDDY VIRGIL 1
46 TEDDY DEAN COVENTRY 1
47 VIRGIL INDIANA 1
48 VIRGIL PRIESTLY 1
49 VIRGIL POSTMAN 1
50 VIRGIL JULIA 1
51 VIRGIL CHARLES 1
52 VIRGIL REBECCA 1
53 VIRGIL DEAN COVENTRY 1
54 ELDERLY WOMAN MACGOWAN 1
55 ELDERLY WOMAN OLD MAN 1
56 ELDERLY WOMAN INDIANA 1
57 ELDERLY WOMAN WOMAN 1
58 ELDERLY WOMAN FIRST DRAFT 1
59 ELDERLY WOMAN OLD WOMAN 1
60 ELDERLY WOMAN MAN 1
61 SCRAGGY MEPHISTO 1
62 SCRAGGY INDIANA 13
63 SCRAGGY GUTTERBUHG 2
64 SCRAGGY CREW MEMBER 1
65 SCRAGGY CLARE 9
66 SCRAGGY KEZURE 3
67 SCRAGGY PORTER 1
68 SCRAGGY SUN WU KUNG 1
69 SCRAGGY DASHIELL 2
70 MACGOWAN VILLAGER 1
71 MACGOWAN INDIANA 3
72 MACGOWAN OLD MAN 1
73 MACGOWAN GALBRAITH 2
74 MACGOWAN WOMAN 1
75 MACGOWAN FIRST DRAFT 1
76 MACGOWAN OLD WOMAN 1
77 MACGOWAN MAN 1
78 OLD WOMAN INDIANA 1
79 OLD WOMAN OLD MAN 1
80 OLD WOMAN FIRST DRAFT 1
81 OLD WOMAN MAN 1
82 OLD WOMAN WOMAN 1
83 CREW MEMBER CLARE 1
84 FIRST DRAFT OLD MAN 1
85 FIRST DRAFT INDIANA 1
86 FIRST DRAFT WOMAN 1
87 FIRST DRAFT MAN 1
88 CHARLES INDIANA 1
89 CHARLES PRIESTLY 1
90 CHARLES POSTMAN 1
91 CHARLES JULIA 1
92 CHARLES REBECCA 1
93 CHARLES DEAN COVENTRY 1
94 REBECCA INDIANA 1
95 REBECCA PRIESTLY 1
96 REBECCA POSTMAN 1
97 REBECCA JULIA 1
98 REBECCA DEAN COVENTRY 1
99 JULIA INDIANA 1
100 JULIA PRIESTLY 1
101 JULIA POSTMAN 1
102 JULIA DEAN COVENTRY 1
103 SUN WU KUNG CLARE 1
104 SUN WU KUNG KEZURE 1
105 SUN WU KUNG INDIANA 1
106 INDIANA DRIVER 1
107 INDIANA VILLAGER 1
108 INDIANA DASHIELL 2
109 INDIANA GUTTERBUHG 3
110 INDIANA POSTMAN 1
111 INDIANA WOMAN 1
112 INDIANA KEZURE 4
113 INDIANA DEAN COVENTRY 1
114 INDIANA PRIESTLY 1
115 INDIANA MEPHISTO 2
116 INDIANA OLD MAN 1
117 INDIANA GALBRAITH 1
118 INDIANA CLARE 8
119 INDIANA PORTER 1
120 INDIANA MAN 1
121 DRIVER GUTTERBUHG 1
122 DASHIELL CLARE 1
123 DASHIELL GUTTERBUHG 1
124 GUTTERBUHG CLARE 1
125 GUTTERBUHG PORTER 1
126 POSTMAN PRIESTLY 1
127 POSTMAN DEAN COVENTRY 1
128 WOMAN OLD MAN 1
129 WOMAN MAN 1
130 KEZURE CLARE 2
131 DEAN COVENTRY PRIESTLY 1
132 MEPHISTO CLARE 1
133 OLD MAN MAN 1
134 CLARE PORTER 1
These data are in an edgelist format. In this format, the first column is the name of the ego node and the second column is the name of the alter. Any additional columns can contain edge attributes such as tie type, strength, value, etc. An edgelist can be converted into an igraph object using the graph.data.frame()
function.
LostArkg<-graph.data.frame(LostArk)
#read more about what this function does
?graph.data.frame
Check out a summary of our data: LostArkg
. Notice the network attributes, such as node names and edge weight. Is this network directed or undirected? How many nodes and edges are in the network?
IGRAPH af78149 DNW- 32 134 --
+ attr: name (v/c), weight (e/n)
+ edges from af78149 (vertex names):
[1] BETSY ->MEPHISTO BETSY ->MARCUS BETSY ->ANGELA
[4] BETSY ->GUTTERBUHG BETSY ->POSTMAN BETSY ->TEDDY
[7] BETSY ->DASHIELL BETSY ->KEZURE BETSY ->VIRGIL
[10] BETSY ->DEAN COVENTRY BETSY ->INDIANA BETSY ->SCRAGGY
[13] BETSY ->CREW MEMBER BETSY ->CLARE BETSY ->PRIESTLY
[16] BETSY ->JULIA BETSY ->CHARLES BETSY ->PORTER
[19] BETSY ->REBECCA BETSY ->SUN WU KUNG MARCUS->INDIANA
[22] ANGELA->INDIANA ANGELA->PRIESTLY ANGELA->POSTMAN
+ ... omitted several edges
LostArkg
IGRAPH af78149 DNW- 32 134 --
+ attr: name (v/c), weight (e/n)
+ edges from af78149 (vertex names):
[1] BETSY ->MEPHISTO BETSY ->MARCUS BETSY ->ANGELA
[4] BETSY ->GUTTERBUHG BETSY ->POSTMAN BETSY ->TEDDY
[7] BETSY ->DASHIELL BETSY ->KEZURE BETSY ->VIRGIL
[10] BETSY ->DEAN COVENTRY BETSY ->INDIANA BETSY ->SCRAGGY
[13] BETSY ->CREW MEMBER BETSY ->CLARE BETSY ->PRIESTLY
[16] BETSY ->JULIA BETSY ->CHARLES BETSY ->PORTER
[19] BETSY ->REBECCA BETSY ->SUN WU KUNG MARCUS->INDIANA
[22] ANGELA->INDIANA ANGELA->PRIESTLY ANGELA->POSTMAN
+ ... omitted several edges
And make our first network plot: plot(LostArkg)
Looks good, but we can do better.
Adjust the edge arrow size, vertex and label size, colors, or other plotting parameters to make a more legible graph.
Network analysis is a powerful tool not only for visualization, but also for understanding power relationships and flows between actors. igraph
includes functions for calculating network, dyadic, and node level statistics. First, let's calculate some node centrality metrics.
V(LostArkg)$degree <- degree(LostArkg)
V(LostArkg)$betweenness <- betweenness(LostArkg)
Determine whether the default output from degree()
is indegree, outdegree, or all ties. Create additional node attributes for the missing centrality measures.
V(LostArkg)$degree <- degree(LostArkg)
V(LostArkg)$degreeAll <- degree(LostArkg, mode=c("all"))
V(LostArkg)$degreeIn <- degree(LostArkg, mode=c("in"))
V(LostArkg)$degreeOut <- degree(LostArkg, mode=c("out"))
Determine who has the highest betweenness. Use both graphical and statistical methods to identify this character. Does this fit with your understanding of the plot of the movie?
degree(LostArkg) #look at the degree centrality for each node
BETSY MARCUS ANGELA YOUNG MAN TEDDY
20 2 10 8 10
VIRGIL ELDERLY WOMAN SCRAGGY MACGOWAN OLD WOMAN
10 8 10 10 8
CREW MEMBER FIRST DRAFT CHARLES REBECCA JULIA
3 8 10 10 10
SUN WU KUNG INDIANA DRIVER DASHIELL GUTTERBUHG
5 30 2 5 7
POSTMAN WOMAN KEZURE DEAN COVENTRY MEPHISTO
10 8 5 10 4
OLD MAN CLARE PRIESTLY PORTER MAN
8 10 10 5 8
VILLAGER GALBRAITH
2 2
V(LostArkg)[degree(LostArkg)>mean(degree(LostArkg))] #select all the nodes for which the degree is greater than the mean degree
+ 14/32 vertices, named, from af78149:
[1] BETSY ANGELA TEDDY VIRGIL SCRAGGY
[6] MACGOWAN CHARLES REBECCA JULIA INDIANA
[11] POSTMAN DEAN COVENTRY CLARE PRIESTLY
V(LostArkg)[degree(LostArkg) == max(degree(LostArkg))] #select the node with the maximum degree
+ 1/32 vertex, named, from af78149:
[1] INDIANA
V(LostArkg)[betweenness(LostArkg) == max(betweenness(LostArkg))]
+ 1/32 vertex, named, from af78149:
[1] INDIANA
V(LostArkg)[betweenness(LostArkg) == min(betweenness(LostArkg))]
+ 14/32 vertices, named, from af78149:
[1] BETSY YOUNG MAN ELDERLY WOMAN SCRAGGY POSTMAN
[6] WOMAN KEZURE DEAN COVENTRY OLD MAN PRIESTLY
[11] PORTER MAN VILLAGER GALBRAITH
betweenness(LostArkg)
BETSY MARCUS ANGELA YOUNG MAN TEDDY
0.0000000 0.8750000 0.8750000 0.0000000 0.8750000
VIRGIL ELDERLY WOMAN SCRAGGY MACGOWAN OLD WOMAN
0.8750000 0.0000000 0.0000000 1.0000000 5.5000000
CREW MEMBER FIRST DRAFT CHARLES REBECCA JULIA
0.5833333 5.5000000 0.8750000 0.8750000 0.8750000
SUN WU KUNG INDIANA DRIVER DASHIELL GUTTERBUHG
12.9583333 171.5000000 18.3333333 4.3333333 7.0833333
POSTMAN WOMAN KEZURE DEAN COVENTRY MEPHISTO
0.0000000 0.0000000 0.0000000 0.0000000 4.9166667
OLD MAN CLARE PRIESTLY PORTER MAN
0.0000000 4.0000000 0.0000000 0.0000000 0.0000000
VILLAGER GALBRAITH
0.0000000 0.0000000
plot(LostArkg,vertex.label.family="Helvetica", vertex.label.cex=0.4, vertex.color="goldenrod",vertex.label.family="Helvetica", vertex.label.color="royal blue", edge.arrow.size=0.1, vertex.frame.color="bisque", edge.color=adjustcolor("light grey",0.2), vertex.size=(V(LostArkg)$betweenness/5+3))
Using nodal attributes, we can compare across the different nodes. Here we plot two different networks sized by indegree and outdegree. How might we interpret these results?
par(mfrow=c(1,2))
plot(LostArkg,vertex.label.family="Helvetica", vertex.label.cex=0.3, vertex.color="goldenrod",vertex.label.family="Helvetica", vertex.label.color="royal blue", edge.arrow.size=0.1, vertex.frame.color="bisque", edge.color=adjustcolor("light grey",.8), vertex.size=(V(LostArkg)$degreeOut*2), edge.width=E(LostArkg)$weight, main="Out degree")
plot(LostArkg,vertex.label.family="Helvetica", vertex.label.cex=0.3, vertex.color="goldenrod",vertex.label.family="Helvetica", vertex.label.color="royal blue", edge.arrow.size=0.1, vertex.frame.color="bisque", edge.color=adjustcolor("light grey",0.8), edge.width=E(LostArkg)$weight, vertex.size=(V(LostArkg)$degreeIn*2), main="In degree")
par(mfrow=c(1,1))
igraph also includes functions for calculating closeness and eigenvector centrality.
There are numerous community detection algorithms for network analysis in igraph. These algorithms allow analyists to divide a network into smaller communities for analysis of the modularity and connectedness of various subgroups. Different algorithms will be appropriate depending on which analysis you intend to conduct. Some available igraph functions include: infomap.community()
, edge.betweenness.community()
, fastgreedy.community()
and others. Here we will use the spinglass.community()
function to identify subgroups in our network.
#set seed and run community detection algorithm
set.seed(1000)
sg1 <- spinglass.community(LostArkg)
##plot communities with shaded background for each community
plot(sg1, LostArkg,
col=membership(sg1),
mark.groups=communities(sg1),
vertex.size=15,
vertex.label=NA,
edge.arrow.size=0.5,
main="Scene interaction communities")
#plot network colored by community
plot(LostArkg,
vertex.color=membership(sg1),
vertex.size=15,
vertex.label.cex=0.3, vertex.label.family="Helvetica", vertex.frame.color="white", vertex.label.color="black",
edge.arrow.size=0.2,
edge.width=E(LostArkg)$weight,
main="Scene interaction communities")
We can also compare the community detection algorithms across several different methods. The best algorithm to use will depend on the network type and your interpretation of the results.
#set.seed(1000)
#fg <- fastgreedy.community(LostArkg) #only works on undirected graphs
set.seed(1000)
wt <- walktrap.community(LostArkg)
set.seed(1000)
eb<- edge.betweenness.community(LostArkg)
par(mfrow=c(1,3))
plot(LostArkg,
vertex.color=sg1$membership,
vertex.size=15,
vertex.label.cex=0.3, vertex.label.family="Helvetica", vertex.frame.color="white", vertex.label.color="black",
edge.arrow.size=0.2,
edge.width=E(LostArkg)$weight,
main="Spinglass")
plot(LostArkg,
vertex.color=eb$membership,
vertex.size=15,
vertex.label.cex=0.3, vertex.label.family="Helvetica", vertex.frame.color="white", vertex.label.color="black",
edge.arrow.size=0.2,
edge.width=E(LostArkg)$weight,
main="Edge Betweenness")
plot(LostArkg,
vertex.color=wt$membership,
vertex.size=15,
vertex.label.cex=0.3, vertex.label.family="Helvetica", vertex.frame.color="white", vertex.label.color="black",
edge.arrow.size=0.2,
edge.width=E(LostArkg)$weight,
main="Walktrap")
par(mfrow=c(1,1))
Let's go back to the Chesapeake Bay foodweb data to analyze the reciprocity in the network. Reciprocity is often of interest to anthropologists, whether we study food sharing, social support, or other interactions. Advanced network analysis can also include assessing transitivity and triangles.
reciprocity(ChesUpper) # what proportion of ties are reciprocated
[1] 0.1401869
dyad_census(ChesUpper) # the number of dyads that are mutual, asymmetrical or null
$mut
[1] 15
$asym
[1] 184
$null
[1] 467
We can also calculate the average and shortest path lengths. Let's find the shortest path between Blue Crabs and Free Bacteria.
mean_distance(ChesUpper, directed=T)
[1] 2.156146
V(ChesUpper)$name
[1] "Net Phytoplankton" "Picoplankton"
[3] "Free Bacteria" "Particle Attached Bacteria"
[5] "Heteroflagellates" "Ciliates"
[7] "Rotifers" "Meroplankton"
[9] "Mesozooplankton" "Ctenophores"
[11] "Chrysaora" "Microphytobenthos"
[13] "SAV" "Benthic Bacteria"
[15] "Meiofauna" "Deposit Feeding Benthos"
[17] "Suspension Feeding Benthos" "Oysters"
[19] "Blue Crab" "Menhaden"
[21] "Bay anchovy" "Herrings and Shads"
[23] "White Perch" "Spot"
[25] "Croaker" "Hogchoker"
[27] "American Eel" "Catfish"
[29] "Striped Bass" "Bluefish"
[31] "Weakfish" "DOC"
[33] "Sediment POC" "POC"
[35] "Input" "Output"
[37] "Respiration"
BlueCrabtoFreeBacteria <- shortest_paths(ChesUpper,
from = V(ChesUpper)[name=="Blue Crab"],
to = V(ChesUpper)[name=="Free Bacteria"],
output = "both")
# highlight shortest path. Adapted from: https://kateto.net/wp-content/uploads/2016/01/NetSciX_2016_Workshop.pdf
edgecolors <- rep("light gray", ecount(ChesUpper))
edgecolors[unlist(BlueCrabtoFreeBacteria$epath)] <- "turquoise"
edgewidth <- rep(2, ecount(ChesUpper))
edgewidth[unlist(BlueCrabtoFreeBacteria$epath)] <- 4
vertexcolors <- rep("light gray", vcount(ChesUpper))
vertexcolors[unlist(BlueCrabtoFreeBacteria$vpath)] <- "turquoise"
vertexnames<- rep(NA, vcount(ChesUpper))
vertexnames[unlist(BlueCrabtoFreeBacteria$vpath)] <- names(unlist(BlueCrabtoFreeBacteria$vpath))
plot(ChesUpper, vertex.color=vertexcolors, edge.color=edgecolors,
edge.width=edgewidth, edge.arrow.mode=0, layout=layout.kamada.kawai, vertex.label=vertexnames, vertex.size=5, vertex.label.cex=0.8, vertex.label.dist=2)