1
0

Initial commit

This commit is contained in:
2022-03-17 17:05:34 +01:00
commit eac77e615d
72 changed files with 44950 additions and 0 deletions

18
analysis/README Normal file
View File

@@ -0,0 +1,18 @@
The bytecounts.py script produces the formulas that are coded into the
analytical.py script.
The analytical.py script produces the formulas that are coded into the
plotdats.py script.
You shouldn't have to touch either of those, unless the simulator itself
changes.
If you're just plotting the output of simulator logfiles, just do:
$ ./parselogs.py ../path/to/*.log
(the above will generate 5 .dat files)
$ ./plotdats.py
(the above will generate a bunch of .pdf graphs)

218
analysis/analytical.py Executable file
View File

@@ -0,0 +1,218 @@
#!/usr/bin/env python3
# Compute analytical formulas for the bytes used per epoch by the
# various modes
import os
import sympy
from bwparser import BandwidthParser
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
A, R_B, R_N, R, logR, C_B, C_N, C, gamma, circ, P_Delta, \
DirAuthConsensusMsg, DirAuthENDIVEDiffMsg, DirAuthGetConsensusMsg, \
DirAuthGetENDIVEDiffMsg, DirAuthUploadDescMsg, DirAuthENDIVEMsg, \
DirAuthGetENDIVEMsg, DirAuthUploadDescMsg, RelayConsensusMsg, \
RelayDescMsg, RelayGetConsensusMsg, RelayGetDescMsg, \
SinglePassCreateCircuitMsgNotLast, SinglePassCreateCircuitMsgLast, \
SinglePassCreatedCircuitCellLast, SinglePassCreatedCircuitCellMiddle, \
SinglePassCreatedCircuitCellFirst, TelescopingCreateCircuitMsg, \
TelescopingCreatedCircuitCell, TelescopingExtendCircuitCell, \
TelescopingExtendedCircuitCell, VanillaCreateCircuitMsg, \
VanillaCreatedCircuitCell, VanillaExtendCircuitCell, \
VanillaExtendedCircuitCell, DirAuthGetConsensusDiffMsg, \
DirAuthConsensusDiffMsg, RelayGetConsensusDiffMsg, \
RelayConsensusDiffMsg \
= sympy.symbols("""
A, R_B, R_N, R, logR, C_B, C_N, C, gamma, circ, P_Delta,
DirAuthConsensusMsg, DirAuthENDIVEDiffMsg, DirAuthGetConsensusMsg,
DirAuthGetENDIVEDiffMsg, DirAuthUploadDescMsg, DirAuthENDIVEMsg,
DirAuthGetENDIVEMsg, DirAuthUploadDescMsg, RelayConsensusMsg,
RelayDescMsg, RelayGetConsensusMsg, RelayGetDescMsg,
SinglePassCreateCircuitMsgNotLast, SinglePassCreateCircuitMsgLast,
SinglePassCreatedCircuitCellLast, SinglePassCreatedCircuitCellMiddle,
SinglePassCreatedCircuitCellFirst, TelescopingCreateCircuitMsg,
TelescopingCreatedCircuitCell, TelescopingExtendCircuitCell,
TelescopingExtendedCircuitCell, VanillaCreateCircuitMsg,
VanillaCreatedCircuitCell, VanillaExtendCircuitCell,
VanillaExtendedCircuitCell, DirAuthGetConsensusDiffMsg,
DirAuthConsensusDiffMsg, RelayGetConsensusDiffMsg,
RelayConsensusDiffMsg
""")
globalsubs = [
(A , 9),
(R_N , R - R_B),
(R_B , 0.010 * R),
(C_N , C - C_B),
(C_B , 0.16 * C),
(circ , gamma * C),
(gamma , 8.9),
(C , 2500000*R/network_size),
(P_Delta, 0.019),
]
# The actual sizes in bytes of each message type were logged by
# uncommenting this line in network.py:
# logging.info("%s size %d", type(self).__name__, sz)
singlepass_merkle_subs = [
(DirAuthConsensusMsg, 877),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, (P_Delta * DirAuthENDIVEMsg).subs(globalsubs)),
(DirAuthENDIVEMsg, 274 * R),
(DirAuthUploadDescMsg, 425),
(RelayConsensusMsg, 873),
(RelayDescMsg, 415),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(SinglePassCreateCircuitMsgLast, 187),
(SinglePassCreateCircuitMsgNotLast, 239),
(SinglePassCreatedCircuitCellFirst, 1426+82*logR),
(SinglePassCreatedCircuitCellMiddle, 903+41*logR),
(SinglePassCreatedCircuitCellLast, 190),
]
singlepass_threshsig_subs = [
(DirAuthConsensusMsg, 789),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, DirAuthENDIVEMsg),
(DirAuthENDIVEMsg, 348*R),
(DirAuthUploadDescMsg, 425),
(RelayConsensusMsg, 784),
(RelayDescMsg, 415),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(SinglePassCreateCircuitMsgLast, 187),
(SinglePassCreateCircuitMsgNotLast, 239),
(SinglePassCreatedCircuitCellFirst, 1554),
(SinglePassCreatedCircuitCellMiddle, 969),
(SinglePassCreatedCircuitCellLast, 190),
]
telescoping_merkle_subs = [
(DirAuthConsensusMsg, 877),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, (P_Delta * DirAuthENDIVEMsg).subs(globalsubs)),
(DirAuthENDIVEMsg, 234 * R),
(DirAuthUploadDescMsg, 372),
(RelayConsensusMsg, 873),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(RelayDescMsg, 362),
(TelescopingCreateCircuitMsg, 120),
(TelescopingCreatedCircuitCell, 179),
(TelescopingExtendCircuitCell, 122),
(TelescopingExtendedCircuitCell, 493+41*logR),
]
telescoping_threshsig_subs = [
(DirAuthConsensusMsg, 789),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, DirAuthENDIVEMsg),
(DirAuthENDIVEMsg, 307*R),
(DirAuthUploadDescMsg, 372),
(RelayConsensusMsg, 788),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(RelayDescMsg, 362),
(TelescopingCreateCircuitMsg, 120),
(TelescopingCreatedCircuitCell, 179),
(TelescopingExtendCircuitCell, 122),
(TelescopingExtendedCircuitCell, 556),
]
vanilla_subs = [
(DirAuthConsensusDiffMsg, (P_Delta * DirAuthConsensusMsg).subs(globalsubs)),
(DirAuthConsensusMsg, RelayConsensusMsg),
(DirAuthGetConsensusDiffMsg, 45),
(DirAuthGetConsensusMsg, 41),
(DirAuthUploadDescMsg, 372),
(RelayConsensusDiffMsg, (P_Delta * RelayConsensusMsg).subs(globalsubs)),
(RelayConsensusMsg, 219*R),
(RelayGetConsensusDiffMsg, 41),
(RelayGetConsensusMsg, 37),
(VanillaCreateCircuitMsg, 116),
(VanillaCreatedCircuitCell, 175),
(VanillaExtendCircuitCell, 157),
(VanillaExtendedCircuitCell, 176),
]
# The formulas were output by bytecounts.py
singlepass_totrelay = \
R_N * ( DirAuthConsensusMsg + DirAuthENDIVEDiffMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEDiffMsg + A*DirAuthUploadDescMsg ) \
+ R_B * ( DirAuthConsensusMsg + DirAuthENDIVEMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEMsg + A*DirAuthUploadDescMsg ) \
+ C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( 3*SinglePassCreateCircuitMsgNotLast + 2*SinglePassCreateCircuitMsgLast + 2*SinglePassCreatedCircuitCellLast + 2*SinglePassCreatedCircuitCellMiddle + SinglePassCreatedCircuitCellFirst + 20 )
singlepass_totclient = \
C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( SinglePassCreateCircuitMsgNotLast + SinglePassCreatedCircuitCellFirst + 4 )
telescoping_totrelay = \
R_N * ( DirAuthConsensusMsg + DirAuthENDIVEDiffMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEDiffMsg + A*DirAuthUploadDescMsg ) \
+ R_B * ( DirAuthConsensusMsg + DirAuthENDIVEMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEMsg + A*DirAuthUploadDescMsg ) \
+ C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( 5*TelescopingCreateCircuitMsg + 5*TelescopingCreatedCircuitCell + 4*TelescopingExtendCircuitCell + 4*TelescopingExtendedCircuitCell + 52 )
telescoping_totclient = \
C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( TelescopingCreateCircuitMsg + TelescopingCreatedCircuitCell + 2*TelescopingExtendCircuitCell + 2*TelescopingExtendedCircuitCell + 20 )
vanilla_totrelay = \
R_N * ( DirAuthConsensusDiffMsg + DirAuthGetConsensusDiffMsg + A*DirAuthUploadDescMsg ) \
+ R_B * ( DirAuthConsensusMsg + DirAuthGetConsensusMsg + A*DirAuthUploadDescMsg ) \
+ C_N * ( RelayConsensusDiffMsg + RelayGetConsensusDiffMsg ) \
+ C_B * ( RelayConsensusMsg + RelayGetConsensusMsg ) \
+ circ * ( 5*VanillaCreateCircuitMsg + 5*VanillaCreatedCircuitCell + 4*VanillaExtendCircuitCell + 4*VanillaExtendedCircuitCell + 52 )
vanilla_totclient = \
C_N * ( RelayConsensusDiffMsg + RelayGetConsensusDiffMsg ) \
+ C_B * ( RelayConsensusMsg + RelayGetConsensusMsg ) \
+ circ * ( VanillaCreateCircuitMsg + VanillaCreatedCircuitCell + 2*VanillaExtendCircuitCell + 2*VanillaExtendedCircuitCell + 20 )
# Copy the output into plotdats.py, replacing 'R' by 'x' and 'logR' by
# 'cail(log(x)/log(2))'
def calculate_relay_analyticals():
relay_perclient_analyticals = {
'singlepass_merkle': (singlepass_totrelay/C).subs(globalsubs).subs(singlepass_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'singlepass_threshsig': (singlepass_totrelay/C).subs(globalsubs).subs(singlepass_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_merkle': (telescoping_totrelay/C).subs(globalsubs).subs(telescoping_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_threshsig': (telescoping_totrelay/C).subs(globalsubs).subs(telescoping_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'vanilla_none': (vanilla_totrelay/C).subs(globalsubs).subs(vanilla_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
}
return relay_perclient_analyticals
def calculate_client_analyticals():
client_perclient_analyticals = {
'singlepass_merkle': (singlepass_totclient/C).subs(globalsubs).subs(singlepass_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'singlepass_threshsig': (singlepass_totclient/C).subs(globalsubs).subs(singlepass_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_merkle': (telescoping_totclient/C).subs(globalsubs).subs(telescoping_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_threshsig': (telescoping_totclient/C).subs(globalsubs).subs(telescoping_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'vanilla_none': (vanilla_totclient/C).subs(globalsubs).subs(vanilla_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
}
return client_perclient_analyticals
if __name__ == '__main__':
print("Client analyticals: ", calculate_client_analyticals())
print("Relay analyticals: ", calculate_relay_analyticals())

250
analysis/bytecounts.py Executable file
View File

@@ -0,0 +1,250 @@
#!/usr/bin/env python3
import random # For simulation, not cryptography!
import math
import sys
import os
import logging
import resource
import sympy
import re
import statistics
sys.path.append("..")
import network
import dirauth
import relay
import client
from bwparser import JansenBandwidthParser, KomloBandwidthParser
class BandwidthMeasurer:
def __init__(self, total_numrelays, bw_parser, numdirauths, numrelays, numclients):
self.total_size = total_numrelays
self.bw_parser = bw_parser
# Start some dirauths
self.dirauthaddrs = []
self.dirauths = []
for i in range(numdirauths):
dira = dirauth.DirAuth(i, numdirauths)
self.dirauths.append(dira)
self.dirauthaddrs.append(dira.netaddr)
# Start some relays
self.relays = []
for i in range(numrelays):
self.startrelay()
# The fallback relays are a hardcoded list of a small fraction
# of the relays, used by clients for bootstrapping
numfallbackrelays = 1
fallbackrelays = self.relays[0:1]
for r in fallbackrelays:
r.set_is_fallbackrelay()
network.thenetwork.setfallbackrelays(fallbackrelays)
# Tick the epoch to build the first consensus
network.thenetwork.nextepoch()
# Start some clients
self.clients = []
for i in range(numclients):
self.startclient()
# Throw away all the performance statistics to this point
for d in self.dirauths: d.perfstats.reset()
for r in self.relays: r.perfstats.reset()
# The clients' stats are already at 0, but they have the
# "bootstrapping" flag set, which we want to keep, so we
# won't reset them.
self.allcircs = []
# Tick the epoch to bootstrap the clients
network.thenetwork.nextepoch()
def startrelay(self):
bw = int(self.calculate_relay_bandwidth())
new_relay = relay.Relay(self.dirauthaddrs, bw, 0)
self.relays.append(new_relay)
def calculate_relay_bandwidth(self):
return random.choice(bw_parser.get_distribution())
def stoprelay(self):
self.relays[1].terminate()
del self.relays[1]
def startclient(self):
self.clients.append(client.Client(self.dirauthaddrs))
def stopclient(self):
self.clients[0].terminate()
del self.clients[0]
def buildcircuit(self):
bwm.allcircs.append(bwm.clients[0].channelmgr.new_circuit())
def getstats(self):
# gather stats
totsent = 0
totrecv = 0
totbytes = 0
dirasent = 0
dirarecv = 0
dirabytes = 0
relaysent = 0
relayrecv = 0
relaybytes = 0
clisent = 0
clirecv = 0
clibytes = 0
for d in self.dirauths:
logging.debug("%s", d.perfstats)
dirasent += d.perfstats.bytes_sent
dirarecv += d.perfstats.bytes_received
dirabytes += d.perfstats.bytes_sent + d.perfstats.bytes_received
totsent += dirasent
totrecv += dirarecv
totbytes += dirabytes
for r in self.relays:
logging.debug("%s", r.perfstats)
relaysent += r.perfstats.bytes_sent
relayrecv += r.perfstats.bytes_received
relaybytes += r.perfstats.bytes_sent + r.perfstats.bytes_received
totsent += relaysent
totrecv += relayrecv
totbytes += relaybytes
for c in self.clients:
logging.debug("%s", c.perfstats)
clisent += c.perfstats.bytes_sent
clirecv += c.perfstats.bytes_received
clibytes += c.perfstats.bytes_sent + c.perfstats.bytes_received
totsent += clisent
totrecv += clirecv
totbytes += clibytes
logging.info("DirAuths sent=%s recv=%s bytes=%s" % \
(dirasent, dirarecv, dirabytes))
logging.info("Relays sent=%s recv=%s bytes=%s" % \
(relaysent, relayrecv, relaybytes))
logging.info("Client sent=%s recv=%s bytes=%s" % \
(clisent, clirecv, clibytes))
logging.info("Total sent=%s recv=%s bytes=%s" % \
(totsent, totrecv, totbytes))
# Reset bootstrap flag
for d in self.dirauths: d.perfstats.is_bootstrapping = False
for r in self.relays: r.perfstats.is_bootstrapping = False
for c in self.clients: c.perfstats.is_bootstrapping = False
return (dirabytes, relaybytes, clibytes)
def endepoch(self):
# Close circuits
for c in self.allcircs:
c.close()
self.allcircs = []
# Reset stats
for d in self.dirauths: d.perfstats.reset()
for r in self.relays: r.perfstats.reset()
for c in self.clients: c.perfstats.reset()
network.thenetwork.nextepoch()
if __name__ == '__main__':
# Args: womode snipauthmode numrelays randseed
if len(sys.argv) != 5:
sys.stderr.write("Usage: womode snipauthmode numrelays randseed\n")
sys.exit(1)
bandwidth_file = os.getenv('BW_FILE')
womode = network.WOMode[sys.argv[1].upper()]
snipauthmode = network.SNIPAuthMode[sys.argv[2].upper()]
numrelays = int(sys.argv[3])
randseed = int(sys.argv[4])
bw_parser = None
if os.getenv('BW_ALGO') != "komlo":
bw_parser = JansenBandwidthParser(bw_file=bandwidth_file, sample_size=numrelays)
else:
# keep the original assumption
bw_parser = KomloBandwidthParser(sample_size=numrelays)
total_size = bw_parser.get_relay_num()
# Use symbolic byte counter mode
network.symbolic_byte_counters = True
# Seed the PRNG. On Ubuntu 18.04, this in fact makes future calls
# to (non-cryptographic) random numbers deterministic. On Ubuntu
# 16.04, it does not.
random.seed(randseed)
loglevel = logging.INFO
# Uncomment to see all the debug messages
# loglevel = logging.DEBUG
logging.basicConfig(level=loglevel,
format="%(asctime)s:%(levelname)s:%(message)s")
logging.info("Starting simulation")
# Set the Walking Onions style to use
network.thenetwork.set_wo_style(womode, snipauthmode)
bwm = BandwidthMeasurer(total_size, bw_parser, 9, numrelays, 0)
stats = dict()
logging.info("R_N = %d, R_B = 0, C_N = 0, C_B = 0, circs = 0", numrelays)
stats[(numrelays, 0, 0, 0, 0)] = bwm.getstats()
# Bootstrap one relay
bwm.startrelay()
bwm.endepoch()
logging.info("R_N = %d, R_B = 1, C_N = 0, C_B = 0, circs = 0", numrelays)
stats[(numrelays, 1, 0, 0, 0)] = bwm.getstats()
# Bootstrap one client
bwm.stoprelay()
bwm.startclient()
bwm.endepoch()
logging.info("R_N = %d, R_B = 0, C_N = 0, C_B = 1, circs = 0", numrelays)
stats[(numrelays, 0, 0, 1, 0)] = bwm.getstats()
# No changes, so the client is now not bootstrapping
bwm.endepoch()
logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 0", numrelays)
stats[(numrelays, 0, 1, 0, 0)] = bwm.getstats()
# No more bootstrapping, but build one circuit
bwm.buildcircuit()
logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 1", numrelays)
stats[(numrelays, 0, 1, 0, 1)] = bwm.getstats()
bwm.endepoch()
# No more bootstrapping, but build two circuits
bwm.buildcircuit()
bwm.buildcircuit()
logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 2", numrelays)
stats[(numrelays, 0, 1, 0, 2)] = bwm.getstats()
bwm.endepoch()
print("\n")
print('Total relay bytes:')
print(' R_N * (', stats[(numrelays, 0, 0, 0, 0)][1]/numrelays, ')')
print('+ R_B * (', stats[(numrelays, 1, 0, 0, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
print('+ C_N * (', stats[(numrelays, 0, 1, 0, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
print('+ C_B * (', stats[(numrelays, 0, 0, 1, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
print('+ circ * (', stats[(numrelays, 0, 1, 0, 1)][1] - stats[(numrelays, 0, 1, 0, 0)][1], ')')
print(' check ', stats[(numrelays, 0, 1, 0, 2)][1] - stats[(numrelays, 0, 1, 0, 1)][1])
print("\n")
print('Total client bytes:')
print(' R_N * (', stats[(numrelays, 0, 0, 0, 0)][2]/numrelays, ')')
print('+ R_B * (', stats[(numrelays, 1, 0, 0, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
print('+ C_N * (', stats[(numrelays, 0, 1, 0, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
print('+ C_B * (', stats[(numrelays, 0, 0, 1, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
print('+ circ * (', stats[(numrelays, 0, 1, 0, 1)][2] - stats[(numrelays, 0, 1, 0, 0)][2], ')')
print(' check ', stats[(numrelays, 0, 1, 0, 2)][2] - stats[(numrelays, 0, 1, 0, 1)][2])

48
analysis/calcprop.py Executable file
View File

@@ -0,0 +1,48 @@
#!/usr/bin/env python3
import os
import sys
import math
import numpy as np
from bwparser import JansenBandwidthParser, KomloBandwidthParser
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
if len(sys.argv) < 3:
print("Usage: calcprop.py scale epoch_count")
exit()
scale = float(sys.argv[1])
epoch_count = int(sys.argv[2]) + 3
bw_parser = None
if os.getenv('BW_ALGO') != "komlo":
bw_parser = JansenBandwidthParser(bw_file=bandwidth_file, netscale=scale)
else:
bw_parser = KomloBandwidthParser(sample_size=math.ceil(6500*scale))
dats = [
('vanilla_none.dat', 'Vanilla'),
('singlepass_merkle.dat', 'Sing(M)'),
('telescoping_merkle.dat', 'Tele(M)'),
('singlepass_threshsig.dat', 'Sing(T)'),
('telescoping_threshsig.dat', 'Tele(T)'),
]
for file, name in dats:
with open(file) as f:
for line in f.readlines():
total_bytes = float(line.split(" ")[3])
net_size = bw_parser.get_relay_num()
throughput = bw_parser.get_relay_throughput()
relay_traffic = (net_size*total_bytes)/(epoch_count * 1000)
prop = relay_traffic/throughput
print("===================================")
print(name)
print("Throughput: " + np.format_float_scientific(throughput, precision=3))
print("Total bytes: " + np.format_float_scientific(relay_traffic, precision=3))
print("Proportion: " + np.format_float_scientific(prop, precision=3))

281
analysis/parselogs.py Executable file
View File

@@ -0,0 +1,281 @@
#!/usr/bin/env python3
# Parse the log files output by the simulator to get the overall average
# and stddev of total bytes sent+received per epoch for each of:
# - dirauths
# - total relays
# - bootstrapping fallback relays per bw
# - non-bootstrapping fallback relays per bw
# - bootstrapping non-fallback relays per bw
# - non-bootstrapping non-fallback relays per bw
# - total clients
# - bootstrapping clients
# - non-bootstrapping clients
# - steady-state dirauths (skipping the first epoch in which all clients
# are bootstrapping)
# - steady-state relays (as above)
# - steady-state clients (as above)
# - steady-state total relay traffic per client (as above)
# Pass the names of the log files as command-line arguments, or the log
# files themselves on stdin.
# The output will be five files named:
# - vanilla_none.dat
# - telescoping_merkle.dat
# - telescoping_threshsig.dat
# - singlepass_merkle.dat
# - singlepass_threshsig.dat
# Each file will contain one line for every network scale seen in the
# logs. The line will consist of 27 fields:
# - the network scale (float)
# - the mean and stddev (two floats) for each of the above 13 classes,
# in the above order; for the steady states, the stats are computed
# over the per-epoch averages, while for the others, the stats are
# computed on a per-entity basis
import math
import re
import os
import fileinput
from bwparser import BandwidthParser
modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig",
"singlepass_merkle", "singlepass_threshsig" ]
class StatAccum:
"""Accumulate (mean, stddev, N) triples to compute an overall (mean,
stddev, N)."""
def __init__(self):
self.sumX = 0
self.sumXsq = 0
self.totN = 0
def accum(self, mean, stddev, N):
if N == 0:
# Nothing to change
return
if N == 1:
# stddev will be None
this_sumX = mean
this_sumXsq = mean*mean
else:
this_sumX = mean * N
variance = stddev * stddev
this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N
self.sumX += this_sumX
self.sumXsq += this_sumXsq
self.totN += N
def stats(self):
if self.totN == 0:
return (None, None, 0)
if self.totN == 1:
return (self.sumX, None, 1)
mean = self.sumX / self.totN
variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \
/ (self.totN - 1)
stddev = math.sqrt(variance)
return (mean, stddev, self.totN)
def __str__(self):
mean, stddev, N = self.stats()
if mean is None:
mean = 0
if stddev is None:
stddev = 0
return "%f %f" % (mean, stddev)
class StatBundle:
"""A bundle of 13 StatAccums, corresponding to the 13 entity classes
listed above."""
def __init__(self):
self.stats = [StatAccum() for i in range(14)]
def dirauth(self, mean, stddev, N):
self.stats[0].accum(mean, stddev, N)
def relay_all(self, mean, stddev, N):
self.stats[1].accum(mean, stddev, N)
def relay_fb_boot(self, mean, stddev, N):
self.stats[2].accum(mean, stddev, N)
def relay_fb_nboot(self, mean, stddev, N):
self.stats[3].accum(mean, stddev, N)
def relay_nfb_boot(self, mean, stddev, N):
self.stats[4].accum(mean, stddev, N)
def relay_nfb_nboot(self, mean, stddev, N):
self.stats[5].accum(mean, stddev, N)
def client_all(self, mean, stddev, N):
self.stats[6].accum(mean, stddev, N)
def client_circuit_build(self, mean, stddev, N):
self.stats[7].accum(mean, stddev, N)
def client_boot(self, mean, stddev, N):
self.stats[8].accum(mean, stddev, N)
def client_nboot(self, mean, stddev, N):
self.stats[9].accum(mean, stddev, N)
def steady_dirauth(self, mean, stddev, N):
self.stats[10].accum(mean, stddev, N)
def steady_relay(self, mean, stddev, N):
self.stats[11].accum(mean, stddev, N)
def steady_client(self, mean, stddev, N):
self.stats[12].accum(mean, stddev, N)
def steady_relay_perclient(self, mean, stddev, N):
self.stats[13].accum(mean, stddev, N)
def __str__(self):
return '%s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (
self.stats[0], self.stats[1], self.stats[2], self.stats[3],
self.stats[4], self.stats[5], self.stats[6], self.stats[7],
self.stats[8], self.stats[9], self.stats[10],
self.stats[11], self.stats[12], self.stats[13])
class LogParser:
"""A class to parse the logfiles output by sim.py."""
def __init__(self, network_size):
# self.stats is a dict indexed by mode name (like
# "singlepass_merkle") whose value is a dict indexed
# by network scale whose value is a StatBundle
self.stats = dict()
self.network_size = network_size
self.curbundle = None
self.fbbootstrapping = None
self.steadystate = False
self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_')
self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*circuit_building_time=([\d\.]+).*N=(\d+)')
self.mibre = re.compile('MiB used')
def parse_line(self, line):
m = self.startre.search(line)
if m:
mode = m.group(1).lower() + "_" + m.group(2).lower()
scale = m.group(3)
if mode not in self.stats:
self.stats[mode] = dict()
if scale not in self.stats[mode]:
self.stats[mode][scale] = StatBundle()
self.curbundle = self.stats[mode][scale]
self.fbbootstrapping = True
self.steadystate = False
return
m = self.statperbwre.search(line)
circuit_buildings = 0.0
if m:
enttype, means, stddevs, meanperbws, stddevperbws, Ns = \
m.group(1,2,4,5,7,8)
else:
m = self.statre.search(line)
if m:
enttype, means, stddevs, circuit_buildings, Ns = \
m.group(1,3,5,6,7)
meanperbws, stddevperbws = None, None
else:
m = self.mibre.search(line)
if m:
# We've reached steady state
self.steadystate = True
return
mean = float(means)
if stddevs:
stddev = float(stddevs)
else:
stddev = None
if meanperbws:
meanperbw = float(meanperbws)
else:
meanperbw = None
if stddevperbws:
stddevperbw = float(stddevperbws)
else:
stddevperbw = None
if circuit_buildings:
circuit_buildings = float(circuit_buildings)
N = int(Ns)
# print('%s %s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, circuit_buildings, N))
if enttype == 'Dirauths':
self.curbundle.dirauth(mean, stddev, N)
if self.steadystate:
self.curbundle.steady_dirauth(mean, None, 1)
elif enttype == 'Relays':
self.curbundle.relay_all(mean, stddev, N)
if self.steadystate:
self.curbundle.steady_relay(mean, None, 1)
self.totrelaybytes = mean * N
elif enttype == 'Relays(FB)':
if self.fbbootstrapping:
self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N)
self.fbbootstrapping = False
else:
self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N)
elif enttype == 'Relays(B)':
self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N)
elif enttype == 'Relays(NB)':
self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N)
elif enttype == 'Clients':
self.curbundle.client_all(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
if self.steadystate:
self.curbundle.steady_client(mean, None, 1)
self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1)
elif enttype == 'Clients(B)':
self.curbundle.client_boot(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
elif enttype == 'Clients(NB)':
self.curbundle.client_nboot(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
else:
raise ValueError('Unknown entity type "%s"' % enttype)
def write_output(self):
for mode in self.stats.keys():
with open("%s.dat" % mode, "w") as datout:
for scale in sorted(self.stats[mode].keys()):
datout.write("%s %s\n" % \
(self.network_size*float(scale), self.stats[mode][scale]))
datout.close()
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
logparser = LogParser(network_size)
for line in fileinput.input():
logparser.parse_line(line)
logparser.write_output()

51
analysis/plotbandwidth.py Executable file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
from bwparser import BandwidthParser
# Plot the bandwidth files using gnuplot
if __name__ == '__main__':
if len(sys.argv) != 2:
sys.stderr.write("Usage: outputdir\n")
sys.exit(1)
# set some defaults
bandwidth_file = os.getenv('BW_FILE')
output_dir = sys.argv[1]
output_file = output_dir + "/bw_file.pdf"
# calculate invariants
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
# Create dat files and gpcode
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s'
set title 'Bandwidth distribution'
set key out
set xlabel "Bandwidth (kB/s)"
set ylabel "Number of relays"
set xtics 100000
plot """ % (output_file)
for bw_file in bw_parser.get_files():
local_bw_parser = BandwidthParser(bw_file=bw_file)
bandwidths = local_bw_parser.get_distribution()
with open("%s.dat" % bw_file, "w") as datout:
for i in range(len(bandwidths)):
datout.write("%s %s\n" % \
(i, bandwidths[i]))
datout.close()
date_string = bw_file.split("-")[0] + "-" + bw_file.split("-")[1] + "-" + bw_file.split("-")[2]
gpcode += "'%s.dat' using 2:1 with lines title '%s'," % (bw_file, date_string)
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))

116
analysis/plotdats.py Executable file
View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python3
import os
import math
import subprocess
import analytical
from bwparser import JansenBandwidthParser
# Plot the dat files generated by parselogs.py using gnuplot
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
bw_parser = None
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = JansenBandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
# The analytical functions come from analytical.py
# Replace 'R' in the output of that program by 'x' and replace
# 'logR' by 'ceil(log(x)/log(2))'.
relay_perclient_analyticals = analytical.calculate_relay_analyticals()
relay_analyticals = {
k: '(2500000/%s)*(%s)' % (network_size, v) for k, v in relay_perclient_analyticals.items()
}
client_analyticals = analytical.calculate_client_analyticals()
plots = [
('dirauth', 'Directory authorities total bytes each', 2, False, None),
('relay', 'Relay total bytes each', 4, False, None),
('relay_bf', 'Bootstrapping fallback relays bytes per bw', 6, True, None),
('relay_f', 'Non-bootstrapping fallback relays bytes per bw', 8, True, None),
('relay_b', 'Bootstrapping normal relays bytes per bw', 10, True, None),
('relay_n', 'Non-bootstrapping normal relays bytes per bw', 12, True, None),
('client', 'Client total bytes each', 14, False, None),
('client_c', 'Client circuit building times', 16, True, None),
('client_b', 'Bootstrapping client total bytes', 18, True, None),
('client_n', 'Non-bootstrapping client total bytes', 20, True, None),
('dirauth_ss', 'Directory authority total bytes each', 22, True, None),
('relay_ss', 'Relay total bytes each', 24, True, relay_analyticals),
('client_ss', 'Client total bytes each', 26, True, client_analyticals),
('relay_perclient_ss', 'Relay total bytes per client', 28, True, relay_perclient_analyticals),
('relay_ss_wide', 'Relay total bytes each', 24, True, relay_analyticals),
('client_ss_wide', 'Client total bytes each', 26, True, client_analyticals),
('relay_perclient_ss_wide', 'Relay total bytes per client', 28, True, relay_perclient_analyticals),
]
dats = [
('vanilla_none', 'Vanilla', 1),
('singlepass_merkle', 'Sing(M)', 2),
('telescoping_merkle', 'Tele(M)', 3),
('singlepass_threshsig', 'Sing(T)', 4),
('telescoping_threshsig', 'Tele(T)', 5),
]
for filename, title, col, errbars, analyticals in plots:
if analyticals is None:
analyticals = dict()
if filename == 'relay_ss_wide':
ranges = 'set xrange [300:%s]\nset logscale xy\nset yrange [10000000:]' % (network_size*100)
elif filename[-5:] == '_wide':
ranges = "set xrange [300:%s]\nset logscale xy\nset yrange [10000:]" % (network_size*100)
else:
ranges = "set xrange [0:2200]\nset yrange [0:]"
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s.pdf'
set title '%s'
%s
set key out
set rmargin 17
set arrow from %s, graph 0 to %s, graph 1 nohead lc 0 lw 2
set xlabel "Number of relays"
set style line 1 lw 2 lc 1 pt 1
set style line 2 lw 2 lc 2 pt 1
set style line 3 lw 2 lc 3 pt 1
set style line 4 lw 2 lc 4 pt 1
set style line 5 lw 2 lc 5 pt 1
set style line 10 lw 2 lc 0 dt 2
set style line 11 lw 2 lc 1 dt 2
set style line 12 lw 2 lc 2 dt 2
set style line 13 lw 2 lc 3 dt 2
set style line 14 lw 2 lc 4 dt 2
set style line 15 lw 2 lc 5 dt 2
plot """ % (filename, title, ranges, network_size, network_size)
firstplot = True
for datname, title, style in dats:
if not firstplot:
gpcode += ", "
else:
firstplot = False
gpcode += "'%s.dat' using 1:%d with lines ls %d title '%s'" % \
(datname, col, style, title)
if errbars:
gpcode += ", '%s.dat' using 1:%d:%d with errorbars ls %d notitle" % \
(datname, col, col+1, style)
if datname in analyticals:
gpcode += ", %s ls %d notitle" % \
(analyticals[datname], style+10)
if analyticals:
gpcode += ", -100 ls 10 title 'Analytical'"
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))

51
analysis/plotdist.py Executable file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
from bwparser import BandwidthParser, JansenBandwidthParser
# Plot the bandwidth files using gnuplot
if __name__ == '__main__':
if len(sys.argv) != 2:
sys.stderr.write("Usage: outputdir\n")
sys.exit(1)
# set some defaults
bandwidth_file = os.getenv('BW_FILE')
output_dir = sys.argv[1]
output_file = output_dir + "/bw_dist.pdf"
# calculate invariants
bw_parser = BandwidthParser(bw_file=bandwidth_file)
true_avg = bw_parser.get_average()
jansen_parser = JansenBandwidthParser(bw_file=bandwidth_file)
# Create dat file
with open("%s-jansen.dat" % bandwidth_file, "w") as datout:
for i in range(1,101):
netscale = i/100
jansen_parser.set_scale(netscale)
datout.write("%s %s\n" % \
(netscale, jansen_parser.get_average()))
datout.close()
# Build plot
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s'
set title 'Generated Distribution for %s'
set key out
set arrow from 1, graph 0 to 1
set xlabel "Scale"
set ylabel "Average Bandwidth"
set xtics 0.1
plot """ % (output_file, bandwidth_file)
gpcode += "'%s-jansen.dat' using 1:2 with lines title 'Bandwidth'" % (bandwidth_file)
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))

132
analysis/plotonionperf.py Executable file
View File

@@ -0,0 +1,132 @@
#!/usr/bin/env python3
import os
import sys
import csv
import fileinput
import subprocess
def parse_onionperf_file(file_name):
res = {}
# fetch sources
sources = fetch_sources(file_name)
# collect circuit building times for source
for s in sources:
with open(os.path.dirname(__file__) + "/datasets/" + file_name, 'r') as f:
reader = csv.DictReader(filter(lambda row: row[0]!='#', f), delimiter=',')
s_counter = 0
s_total = 0
for row in reader:
if row['source'] == s:
# we have completed a circuit after reaching the exit relay
if int(row['position']) == 3:
s_counter += 1
s_total += float(row['md'])
# calculate average
if s_counter != 0:
res[s] = s_total/s_counter
return res
def fetch_sources(file_name):
sources = []
with open(os.path.dirname(__file__) + "/datasets/" + file_name, 'r') as f:
# open the file rows as dicts, skip the comments
reader = csv.DictReader(filter(lambda row: row[0]!='#', f), delimiter=',')
# fetch sources
for row in reader:
if row['source'] != '' and row['source'] not in sources:
sources.append(row['source'])
f.close()
return sources
# Plot the relative circuit building times using gnuplot
if __name__ == '__main__':
# set some defaults
file_name = os.getenv('CB_FILE')
# fetch average times from onion performance file
avg_times = parse_onionperf_file(file_name)
#print(avg_times)
# fetch simulator times
sim_times = {}
dats = []
dats += [each for each in os.listdir('.') if each.endswith('.dat') and 'client_c_relative' not in each]
for dat in dats:
with open(dat, 'r') as f:
for line in f.readlines():
sim_times.update({dat: float(line.split(" ")[15])})
#print(sim_times)
# vanilla is base
base = sim_times['vanilla_none.dat']
# calculate ratios
ratios = {
'tele-thresh': sim_times['telescoping_threshsig.dat']/base,
'tele-merkle': sim_times['telescoping_merkle.dat']/base,
'singlep-thresh': sim_times['singlepass_threshsig.dat']/base,
'singlep-merkle': sim_times['singlepass_merkle.dat']/base
}
#print(ratios)
# calculate relative times
relative_times = {}
for avg in avg_times.keys():
for ratio in ratios.keys():
relative_times.update({
avg + "-vanilla": avg_times[avg]
})
relative_times.update({
avg + "-" + ratio: avg_times[avg] * ratios[ratio]
})
#print(relative_times)
# Create dat file
for source in fetch_sources(file_name):
with open("client_c_relative-%s.dat" % source, "w") as datout:
for k in relative_times.keys():
if source in k:
datout.write("%s " % \
(relative_times[k]))
datout.close()
# Build plot
for source in fetch_sources(file_name):
output_file = "client_c_relative-%s.pdf" % source
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s'
set title 'Relative circuit building times'
set arrow from %s
set grid y
set key out
unset xtics
set ylabel 'Relative times (ms)'
plot """ % (output_file, max(relative_times.values()))
col = 1
firstplot = True
for k in relative_times.keys():
if source in k and source:
if firstplot is False:
gpcode += ", "
gpcode += "'client_c_relative-%s.dat' using 0:%d with points title '%s'" % (source, col, k)
col += 1
firstplot = False
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))