from __future__ import print_function
from math import *
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from matplotlib import pyplot as plt
import numpy as np
import math
import random
%matplotlib inline

# simple Variables for reuse

find_node_packet = 72
nodes_packet = 2762 # given that the peer returns 16 nodes.
distance_segments = 256 # distance segments: resolution of distance metric
k = 16 # Kademlia K variable (peers per bucket)
max_peers = distance_segments * k


## Peer Changing Algorithm¶

This calculation is a rough estimation of how a nodes routing tables evolve. It takes into consideration that nodes are likely receive nodes to FINDNODE requests that they are already familiar with.

# Returns the number of nodes after a node lookup round as a function of current nodes
def calc_node_change(current_nodes):
if current_nodes < k:
return k
else:
filled_buckets = current_nodes // k
empty_buckets = distance_segments - filled_buckets
factor = random.randint(1, empty_buckets + 1)
return int(current_nodes + k*factor)

x = np.arange(1,max_peers,10)
y = [calc_node_change(val) for val in x]
plt.figure(figsize=[10,10])

plt.plot(x,y)
plt.xlabel('current nodes')
plt.ylabel('new number of nodes')
plt.grid(True)
plt.show()


# Bandwidth for simple lookups¶

This section shows us the amount of inbound and outbound bandwidth consumption used by discv5 when looking for a specific node. It uses log2(n) as that is the amount of Nodes needed to contact for finding a Node in Kademlia, where n is the amount of nodes in the network itself.

def calculate_bandwidth(nodes):
num_roundtrips = ceil(log10(nodes)) # This is the theoretical efficiency

outbound = find_node_packet * num_roundtrips
inbound = nodes_packet * num_roundtrips

print("B per lookup (outbound): % 2d" %(outbound))
print("B per lookup (inbound):  % 2d" %(inbound))
print("B per lookup (total):    % 2d" %(outbound + inbound))

interact(calculate_bandwidth, nodes=widgets.IntSlider(min=2, max=10000, step=1, value=100, description='Nodes:'))

<function __main__.calculate_bandwidth(nodes)>

These numbers can be slightly misleading as we do not round.

# Bandwidth for a discv5 node over a given period¶

This section shows how much bandwidth (roughly) a discv5 node consumes when running the protocol. This calculation is limited to FINDNODE & NODE requests, it currently does not account for PINGs, PONGs or handshakes.

## variables

# Make the intervals configurable, so that it doesn't have to be every 60 seconds aka (1440) like right now.
#  req_freq is in seconds
#  time_period is in hours

def calc_discv5_band(num_peers, req_freq, time_period):
node_num_reqs_per_period = int(time_period*3600 / req_freq)    # req/time_period

out_band_per_node_per_period = (num_peers * node_num_reqs_per_period) * (find_node_packet)
in_band_per_node_per_period = (num_peers * node_num_reqs_per_period) * (nodes_packet)
tot_band_per_node_per_period = (num_peers * node_num_reqs_per_period) * (find_node_packet + nodes_packet)

print("KB per period per node (outbound): {:,}".format(out_band_per_node_per_period / 1000))
print("KB per period per node (inbound):  {:,}".format(in_band_per_node_per_period / 1000))
print("KB per period per node (total):    {:,}".format(tot_band_per_node_per_period / 1000))

interact(
calc_discv5_band,
num_peers=widgets.IntSlider(min=1, max=10000, step=1, value=1, description='Peers:'),
req_freq=widgets.IntSlider(min=30, max=600, step=1, value=60, description='Requests Frequency (s):'),
time_period=widgets.IntSlider(min=1, max=24, step=1, value=1, description='Duration (h):')
)

<function __main__.calc_discv5_band(num_peers, req_freq, time_period)>

# Needle in a haystack with ENR records indicating capabilities¶

To find a node with a specific capability that is encoded in its ENR, we can do the following back-of-the-envelope calculation. Assuming:

1. We search for random node id
2. We get k results back in response to FINDNODE
3. We connect to 1 of those nodes that we haven't connected to before (e.g.)
4. Concentration of nodes with specific capability is c%
5. Node capability is evenly distributed among ids

Let's further say we have n nodes active (N.B. this seems irrelevant). What does this mean for the probability to find a node with some capability once we have received r rounds of FINDNODE results?

Assuming independent events where each event is probability that one specific ENR has our capability. By principle of inclusion and exclusion we have:

P(A u B u C) = 1 - P(!A ^ !B ^ !C)

Since P(A ^ B) = 0, etc. This trivially generalizes for more events, D, E, F.

Thus we have:

The probability of finding 1 node is...

P(A u ...[15 more times]) = 1 - P(!A ^ ...[15 more times]) = 1 - ((1-0.1))^16^

1 - ((1-0.1)**16)

0.8146979811148158
1 - ((1-0.01)**160)

0.7997229731425107

Thus we have that a concentration of 10% and 16 FINDNODE records gives us 80% chance of finding a node of a given capability after just one lookup. With 1% concentration, this requires 10 lookups.

def calc_needle_haystack(bips, k, lookups):
concentration = (bips/100)/100
print("Concentration:", concentration)
probability = 1 - ((1 - concentration))**(k*lookups)
print("Probability to find needle:", probability)

interact(
calc_needle_haystack,
bips=widgets.IntSlider(min=100, max=10000, step=100, value=1000, description='bips:'),
k=widgets.IntSlider(min=1, max=40, step=1, value=16, description='k:'),
lookups=widgets.IntSlider(min=1, max=24, step=1, value=1, description='Lookups:')
)

<function __main__.calc_needle_haystack(bips, k, lookups)>