from __future__ import print_function
from math import *
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from matplotlib import pyplot as plt
import numpy as np
import math
import random
%matplotlib inline
# simple Variables for reuse

find_node_packet = 72
nodes_packet = 2762 # given that the peer returns 16 nodes.
distance_segments = 256 # distance segments: resolution of distance metric
k = 16 # Kademlia K variable (peers per bucket)
max_peers = distance_segments * k

Peer Changing Algorithm

This calculation is a rough estimation of how a nodes routing tables evolve. It takes into consideration that nodes are likely receive nodes to FINDNODE requests that they are already familiar with.

# Returns the number of nodes after a node lookup round as a function of current nodes
def calc_node_change(current_nodes):
    if current_nodes < k:
        return k
        filled_buckets = current_nodes // k
        empty_buckets = distance_segments - filled_buckets
        factor = random.randint(1, empty_buckets + 1)
        return int(current_nodes + k*factor)
x = np.arange(1,max_peers,10)
y = [calc_node_change(val) for val in x]

plt.xlabel('current nodes')
plt.ylabel('new number of nodes')

Bandwidth for simple lookups

This section shows us the amount of inbound and outbound bandwidth consumption used by discv5 when looking for a specific node. It uses log2(n) as that is the amount of Nodes needed to contact for finding a Node in Kademlia, where n is the amount of nodes in the network itself.

def calculate_bandwidth(nodes):
    num_roundtrips = ceil(log10(nodes)) # This is the theoretical efficiency

    outbound = find_node_packet * num_roundtrips
    inbound = nodes_packet * num_roundtrips

    print("B per lookup (outbound): % 2d" %(outbound))
    print("B per lookup (inbound):  % 2d" %(inbound))
    print("B per lookup (total):    % 2d" %(outbound + inbound))

interact(calculate_bandwidth, nodes=widgets.IntSlider(min=2, max=10000, step=1, value=100, description='Nodes:'))
<function __main__.calculate_bandwidth(nodes)>

These numbers can be slightly misleading as we do not round.

Bandwidth for a discv5 node over a given period

This section shows how much bandwidth (roughly) a discv5 node consumes when running the protocol. This calculation is limited to FINDNODE & NODE requests, it currently does not account for PINGs, PONGs or handshakes.

## variables

# Make the intervals configurable, so that it doesn't have to be every 60 seconds aka (1440) like right now.
#  req_freq is in seconds
#  time_period is in hours

def calc_discv5_band(num_peers, req_freq, time_period):
    node_num_reqs_per_period = int(time_period*3600 / req_freq)    # req/time_period

    out_band_per_node_per_period = (num_peers * node_num_reqs_per_period) * (find_node_packet)
    in_band_per_node_per_period = (num_peers * node_num_reqs_per_period) * (nodes_packet)
    tot_band_per_node_per_period = (num_peers * node_num_reqs_per_period) * (find_node_packet + nodes_packet)

    print("KB per period per node (outbound): {:,}".format(out_band_per_node_per_period / 1000))
    print("KB per period per node (inbound):  {:,}".format(in_band_per_node_per_period / 1000))
    print("KB per period per node (total):    {:,}".format(tot_band_per_node_per_period / 1000))

    num_peers=widgets.IntSlider(min=1, max=10000, step=1, value=1, description='Peers:'),
    req_freq=widgets.IntSlider(min=30, max=600, step=1, value=60, description='Requests Frequency (s):'),
    time_period=widgets.IntSlider(min=1, max=24, step=1, value=1, description='Duration (h):')
<function __main__.calc_discv5_band(num_peers, req_freq, time_period)>

Needle in a haystack with ENR records indicating capabilities

To find a node with a specific capability that is encoded in its ENR, we can do the following back-of-the-envelope calculation. Assuming:

  1. We search for random node id
  2. We get k results back in response to FINDNODE
  3. We connect to 1 of those nodes that we haven't connected to before (e.g.)
  4. Concentration of nodes with specific capability is c%
  5. Node capability is evenly distributed among ids

Let's further say we have n nodes active (N.B. this seems irrelevant). What does this mean for the probability to find a node with some capability once we have received r rounds of FINDNODE results?

Assuming independent events where each event is probability that one specific ENR has our capability. By principle of inclusion and exclusion we have:

P(A u B u C) = 1 - P(!A ^ !B ^ !C)

Since P(A ^ B) = 0, etc. This trivially generalizes for more events, D, E, F.

Thus we have:

The probability of finding 1 node is...

Let's start with basic assumptions, k=16, c=0.1.

Let's start with basic assumptions, k=16, c=0.1.

P(A u ...[15 more times]) = 1 - P(!A ^ ...[15 more times]) = 1 - ((1-0.1))^16^
1 - ((1-0.1)**16)
1 - ((1-0.01)**160)

Thus we have that a concentration of 10% and 16 FINDNODE records gives us 80% chance of finding a node of a given capability after just one lookup. With 1% concentration, this requires 10 lookups.

def calc_needle_haystack(bips, k, lookups):
    concentration = (bips/100)/100
    print("Concentration:", concentration)
    probability = 1 - ((1 - concentration))**(k*lookups)
    print("Probability to find needle:", probability)

    bips=widgets.IntSlider(min=100, max=10000, step=100, value=1000, description='bips:'),
    k=widgets.IntSlider(min=1, max=40, step=1, value=16, description='k:'),
    lookups=widgets.IntSlider(min=1, max=24, step=1, value=1, description='Lookups:')
<function __main__.calc_needle_haystack(bips, k, lookups)>