buid and modify graph

import networkx as nx
graph = nx.Graph()
graph.add_node(1)
graph.add_nodes_from(range(2,4))
graph.add_nodes_from(['u','v'])
graph.nodes()
[1, 2, 3, 'u', 'v']
graph.add_edge(1,2)
graph.add_edge('u','v')
graph.add_edges_from([(1,3),(1,4),(1,5),(1,6)]) # automatically ignore the repeated edges
graph.add_edge('u','w')
graph.edges()
[(1, 2), (1, 3), (1, 4), (1, 5), (1, 6), ('u', 'v'), ('u', 'w')]
graph.nodes()
[1, 2, 3, 4, 'u', 6, 'w', 5, 'v']
graph.remove_node(2)
graph.nodes()
[1, 3, 4, 'u', 6, 'w', 5, 'v']
graph.edges()
[(1, 3), (1, 4), (1, 5), (1, 6), ('u', 'v'), ('u', 'w')]
graph.remove_nodes_from([4,5])
graph.nodes()
[1, 3, 'u', 6, 'w', 'v']
graph.edges()
[(1, 3), (1, 6), ('u', 'v'), ('u', 'w')]
graph.remove_edge(1,3)
graph.remove_edges_from([(1,2),('u','v')]) # ignore the non-exist edges 
graph.number_of_nodes()
6
graph.number_of_edges()
2

visualize graph

g = nx.karate_club_graph() # a dataset from its install package
import matplotlib.pyplot as plt
kw = {'with_labels':True, 'node_color':'lightblue', 'edge_color':'gray'}
%matplotlib inline
nx.draw(g,**kw)

g.degree() # key is node id , value is degree
{0: 16,
 1: 9,
 2: 10,
 3: 6,
 4: 3,
 5: 4,
 6: 4,
 7: 4,
 8: 5,
 9: 2,
 10: 3,
 11: 1,
 12: 2,
 13: 5,
 14: 2,
 15: 2,
 16: 2,
 17: 2,
 18: 2,
 19: 3,
 20: 2,
 21: 2,
 22: 2,
 23: 5,
 24: 3,
 25: 3,
 26: 2,
 27: 4,
 28: 3,
 29: 4,
 30: 4,
 31: 6,
 32: 12,
 33: 17}
g.degree(33)
17
g.number_of_nodes()
34
g.number_of_edges()
78

random graph ER model

from scipy.stats import bernoulli
bernoulli.rvs(p=0.1)
0
bernoulli.rvs(p=0.5)
0
bernoulli.rvs(p=0.5)
0
bernoulli.rvs(p=0.5)
1
import itertools
g = nx.Graph()
n_nodes = 20
p = 0.2
g.add_nodes_from(range(n_nodes))
for node1, node2 in itertools.combinations(range(20),2):
    if bernoulli.rvs(p=p):
        g.add_edge(node1,node2)
nx.draw(g)

plot the degree distribution

def plot_degree_distribution(g):
    values = list(g.degree().values())
    plt.hist(values, histtype='step', normed=True)
    plt.xlabel('degree $k$')
    plt.ylabel('frequency $P(k)$')
    plt.title('degree distribution')
plot_degree_distribution(g)

def er_graph(n_nodes, p):
    g = nx.Graph()
    n_nodes = 20
    p = 0.2
    g.add_nodes_from(range(n_nodes))
    for node1, node2 in itertools.combinations(range(20),2):
        if bernoulli.rvs(p=p):
            g.add_edge(node1,node2)
    return g

Descriptive Statistics of Empirical Social Networks

import numpy as np
a1 = np.loadtxt('./adj_allVillageRelationships_vilno_1.csv',delimiter=',')
a2 = np.loadtxt('./adj_allVillageRelationships_vilno_2.csv',delimiter=',')
a1.shape, a2.shape
((843, 843), (877, 877))
g1 = nx.to_networkx_graph(a1)
g2 = nx.to_networkx_graph(a2)
def basic_net_stats(g):
    print('number of nodes ', g.number_of_nodes())
    print('number of edges ', g.number_of_edges())
    print('average degree ', np.mean(list(g.degree().values())))
basic_net_stats(g1)
number of nodes  843
number of edges  3405
average degree  8.07829181495
basic_net_stats(g2)
number of nodes  877
number of edges  3063
average degree  6.98517673888
plot_degree_distribution(g1) 
# er model is not fit in humman relationship in this case.

Largest Connected Component

gen = nx.connected_component_subgraphs(g1)
next(gen).number_of_nodes()
825
next(gen).number_of_nodes()
3
next(gen).number_of_nodes()
3
next(gen).number_of_nodes()
4
next(gen).number_of_nodes()
2
next(gen).number_of_nodes()
4
next(gen).number_of_nodes()
1
next(gen).number_of_nodes()
1
next(gen).number_of_nodes()
---------------------------------------------------------------------------

StopIteration                             Traceback (most recent call last)

<ipython-input-80-992d1a5e6975> in <module>()
----> 1 next(gen).number_of_nodes()


StopIteration: 
len(g1) == g1.number_of_nodes()
True
g1_lcc = max(nx.connected_component_subgraphs(g1), key=len)
plt.figure()
nx.draw(g1_lcc, node_color='red',edge_color='gray',node_size=20) # be patient 

g2_lcc = max(nx.connected_component_subgraphs(g2), key=len)
g1_lcc.number_of_nodes(), g2_lcc.number_of_nodes()
(825, 810)
g1_lcc.number_of_nodes()/len(g1), g2_lcc.number_of_nodes()/len(g2)
(0.9786476868327402, 0.9236031927023945)