1
0
Fork 0
walkingonions-boosted/analysis/parselogs.py

282 lines
9.5 KiB
Python
Executable File

#!/usr/bin/env python3
# Parse the log files output by the simulator to get the overall average
# and stddev of total bytes sent+received per epoch for each of:
# - dirauths
# - total relays
# - bootstrapping fallback relays per bw
# - non-bootstrapping fallback relays per bw
# - bootstrapping non-fallback relays per bw
# - non-bootstrapping non-fallback relays per bw
# - total clients
# - bootstrapping clients
# - non-bootstrapping clients
# - steady-state dirauths (skipping the first epoch in which all clients
# are bootstrapping)
# - steady-state relays (as above)
# - steady-state clients (as above)
# - steady-state total relay traffic per client (as above)
# Pass the names of the log files as command-line arguments, or the log
# files themselves on stdin.
# The output will be five files named:
# - vanilla_none.dat
# - telescoping_merkle.dat
# - telescoping_threshsig.dat
# - singlepass_merkle.dat
# - singlepass_threshsig.dat
# Each file will contain one line for every network scale seen in the
# logs. The line will consist of 27 fields:
# - the network scale (float)
# - the mean and stddev (two floats) for each of the above 13 classes,
# in the above order; for the steady states, the stats are computed
# over the per-epoch averages, while for the others, the stats are
# computed on a per-entity basis
import math
import re
import os
import fileinput
from bwparser import BandwidthParser
modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig",
"singlepass_merkle", "singlepass_threshsig" ]
class StatAccum:
"""Accumulate (mean, stddev, N) triples to compute an overall (mean,
stddev, N)."""
def __init__(self):
self.sumX = 0
self.sumXsq = 0
self.totN = 0
def accum(self, mean, stddev, N):
if N == 0:
# Nothing to change
return
if N == 1:
# stddev will be None
this_sumX = mean
this_sumXsq = mean*mean
else:
this_sumX = mean * N
variance = stddev * stddev
this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N
self.sumX += this_sumX
self.sumXsq += this_sumXsq
self.totN += N
def stats(self):
if self.totN == 0:
return (None, None, 0)
if self.totN == 1:
return (self.sumX, None, 1)
mean = self.sumX / self.totN
variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \
/ (self.totN - 1)
stddev = math.sqrt(variance)
return (mean, stddev, self.totN)
def __str__(self):
mean, stddev, N = self.stats()
if mean is None:
mean = 0
if stddev is None:
stddev = 0
return "%f %f" % (mean, stddev)
class StatBundle:
"""A bundle of 13 StatAccums, corresponding to the 13 entity classes
listed above."""
def __init__(self):
self.stats = [StatAccum() for i in range(14)]
def dirauth(self, mean, stddev, N):
self.stats[0].accum(mean, stddev, N)
def relay_all(self, mean, stddev, N):
self.stats[1].accum(mean, stddev, N)
def relay_fb_boot(self, mean, stddev, N):
self.stats[2].accum(mean, stddev, N)
def relay_fb_nboot(self, mean, stddev, N):
self.stats[3].accum(mean, stddev, N)
def relay_nfb_boot(self, mean, stddev, N):
self.stats[4].accum(mean, stddev, N)
def relay_nfb_nboot(self, mean, stddev, N):
self.stats[5].accum(mean, stddev, N)
def client_all(self, mean, stddev, N):
self.stats[6].accum(mean, stddev, N)
def client_circuit_build(self, mean, stddev, N):
self.stats[7].accum(mean, stddev, N)
def client_boot(self, mean, stddev, N):
self.stats[8].accum(mean, stddev, N)
def client_nboot(self, mean, stddev, N):
self.stats[9].accum(mean, stddev, N)
def steady_dirauth(self, mean, stddev, N):
self.stats[10].accum(mean, stddev, N)
def steady_relay(self, mean, stddev, N):
self.stats[11].accum(mean, stddev, N)
def steady_client(self, mean, stddev, N):
self.stats[12].accum(mean, stddev, N)
def steady_relay_perclient(self, mean, stddev, N):
self.stats[13].accum(mean, stddev, N)
def __str__(self):
return '%s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (
self.stats[0], self.stats[1], self.stats[2], self.stats[3],
self.stats[4], self.stats[5], self.stats[6], self.stats[7],
self.stats[8], self.stats[9], self.stats[10],
self.stats[11], self.stats[12], self.stats[13])
class LogParser:
"""A class to parse the logfiles output by sim.py."""
def __init__(self, network_size):
# self.stats is a dict indexed by mode name (like
# "singlepass_merkle") whose value is a dict indexed
# by network scale whose value is a StatBundle
self.stats = dict()
self.network_size = network_size
self.curbundle = None
self.fbbootstrapping = None
self.steadystate = False
self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_')
self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*circuit_building_time=([\d\.]+).*N=(\d+)')
self.mibre = re.compile('MiB used')
def parse_line(self, line):
m = self.startre.search(line)
if m:
mode = m.group(1).lower() + "_" + m.group(2).lower()
scale = m.group(3)
if mode not in self.stats:
self.stats[mode] = dict()
if scale not in self.stats[mode]:
self.stats[mode][scale] = StatBundle()
self.curbundle = self.stats[mode][scale]
self.fbbootstrapping = True
self.steadystate = False
return
m = self.statperbwre.search(line)
circuit_buildings = 0.0
if m:
enttype, means, stddevs, meanperbws, stddevperbws, Ns = \
m.group(1,2,4,5,7,8)
else:
m = self.statre.search(line)
if m:
enttype, means, stddevs, circuit_buildings, Ns = \
m.group(1,3,5,6,7)
meanperbws, stddevperbws = None, None
else:
m = self.mibre.search(line)
if m:
# We've reached steady state
self.steadystate = True
return
mean = float(means)
if stddevs:
stddev = float(stddevs)
else:
stddev = None
if meanperbws:
meanperbw = float(meanperbws)
else:
meanperbw = None
if stddevperbws:
stddevperbw = float(stddevperbws)
else:
stddevperbw = None
if circuit_buildings:
circuit_buildings = float(circuit_buildings)
N = int(Ns)
# print('%s %s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, circuit_buildings, N))
if enttype == 'Dirauths':
self.curbundle.dirauth(mean, stddev, N)
if self.steadystate:
self.curbundle.steady_dirauth(mean, None, 1)
elif enttype == 'Relays':
self.curbundle.relay_all(mean, stddev, N)
if self.steadystate:
self.curbundle.steady_relay(mean, None, 1)
self.totrelaybytes = mean * N
elif enttype == 'Relays(FB)':
if self.fbbootstrapping:
self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N)
self.fbbootstrapping = False
else:
self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N)
elif enttype == 'Relays(B)':
self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N)
elif enttype == 'Relays(NB)':
self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N)
elif enttype == 'Clients':
self.curbundle.client_all(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
if self.steadystate:
self.curbundle.steady_client(mean, None, 1)
self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1)
elif enttype == 'Clients(B)':
self.curbundle.client_boot(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
elif enttype == 'Clients(NB)':
self.curbundle.client_nboot(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
else:
raise ValueError('Unknown entity type "%s"' % enttype)
def write_output(self):
for mode in self.stats.keys():
with open("%s.dat" % mode, "w") as datout:
for scale in sorted(self.stats[mode].keys()):
datout.write("%s %s\n" % \
(self.network_size*float(scale), self.stats[mode][scale]))
datout.close()
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
logparser = LogParser(network_size)
for line in fileinput.input():
logparser.parse_line(line)
logparser.write_output()