1
0

Initial commit

This commit is contained in:
Ivaylo Ivanov 2022-03-17 17:05:34 +01:00
commit eac77e615d
72 changed files with 44950 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
docker
logdir
!results/**/*.pdf
!results/**/*.log
__pycache__
*.svg

14
Dockerfile.in Normal file
View File

@ -0,0 +1,14 @@
FROM docker.io/ubuntu:18.04
RUN apt update && apt install -y python3 python3-pip python3-dev python3-sympy build-essential screen sudo psmisc gnuplot-nox htop
RUN pip3 install merklelib==1.0 pynacl==1.3.0 numpy==1.19.5
WORKDIR /home/walkingo
RUN mkdir analysis && mkdir datasets
COPY wo_docker_start /usr/bin/
COPY .screenrc ./
COPY *.py ./
COPY datasets/. datasets/
ENV SHELL=/bin/bash
CMD /usr/bin/wo_docker_start

20
LICENSE Normal file
View File

@ -0,0 +1,20 @@
Copyright 2019, 2020 Ian Goldberg and Chelsea Komlo
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

69
Makefile Normal file
View File

@ -0,0 +1,69 @@
####################################################
# ------------------------------------------------------------------------
# Minimalistic Makefile
# ------------------------------------------------------------------------
####################################################
SHELL = /bin/sh
CURRENT_UID := $(shell id -u)
CURRENT_GID := $(shell id -g)
# bandwidth file to use for the simulation. Has effect when the bandwidth distribution algorithm is Jansen
BW_FILE := 2021-12-16-16-41-15-bandwidth-lower
# file, containing circuit building times to evaluate
CB_FILE := onionperf-buildtimes-2021-12-13-2021-12-19.csv
# algorithm for bandwidth distribution. Anything other than "komlo" will default to the Jansen algorithm
BW_ALGO := jansen
export CURRENT_UID
export CURRENT_GID
export BW_FILE
export CB_FILE
export BW_ALGO
all: clean build run
attach:
docker-compose exec -u walkingo walking_onions screen -rd
build:
mkdir -p docker/datasets
cp -av Dockerfile.in docker/Dockerfile
cp -av wo_docker_start.in docker/wo_docker_start
chmod 755 docker/wo_docker_start
cp -av *.py docker/
cp -av analysis/*.py docker/
cp -av datasets/* docker/datasets/
echo "deflogin off" > docker/.screenrc
docker-compose build
clean:
docker-compose down || true
rm -rf docker
debug: clean build run
docker-compose exec -u walkingo walking_onions bash
docs:
python3 -m pydoc -p 8080 -b simulator
plots: clean build run
docker-compose exec -u walkingo walking_onions /bin/bash -c "cd logdir && ../parselogs.py *.log"
docker-compose exec -u walkingo walking_onions /bin/bash -c "cd logdir && ../plotdats.py"
docker-compose exec -u walkingo walking_onions /bin/bash -c "cd logdir && ../plotonionperf.py"
docker-compose exec -u walkingo walking_onions /bin/bash -c "./plotbandwidth.py logdir"
docker-compose exec -u walkingo walking_onions /bin/bash -c "./plotdist.py logdir"
run:
mkdir -p logdir
ifeq (,$(wildcard logdir/run_sims))
cp run_sims.in logdir/run_sims
chmod 755 logdir/run_sims
endif
docker-compose up -d
uml:
pyreverse -o svg -p simulator .

114
README.md Normal file
View File

@ -0,0 +1,114 @@
# Simulator for Testing Walking Onions Performance (Improved)
This repository contains the simulator used in the paper "[Walking Onions:
Scaling Anonymity Networks while Protecting Users](https://crysp.uwaterloo.ca/software/walkingonions/)", alongside with several improvements.
This is open-source software, under the [MIT License](LICENSE).
## What is included
In this repository, you will find:
* **README.md**: this file
* **bwparser.py**, **cell.py** **client.py**, **connection.py**, **dirauth.py**, **msg.py**, **network.py**, **relay.py**, **server.py**, **sim.py**: the source code for the simulator
* **docker-compose.yml**, **Makefile**: scripts to create and run the docker containing the simulator (see below)
* **Dockerfile.in**, **run\_sims_.in**, **wo\_docker\_start.in**: templates used `make` to build the docker image
* **analysis**: a directory containing scripts to analyze the log files produced by the simulator and generate graphs in PDF form. See [Analyzing the results](#analyzing-the-results) below for more information.
* **logs**: a directory containing the logs output by the simulator when _we_ ran it. These are the very logfiles that were processed by the [parselogs.py](analysis/parselogs.py) and [plotdats.py](analysis/plotdats.py) scripts to produce the graphs in the paper. (When you run the simulator yourself, your log files will end up in a directory called **logdir** that will be created by **run-docker**.)
## tl;dr
* `make`
* Edit the **logdir/run_sims** file to uncomment the simulations you want to run in parallel, noting the memory requirements of each simulation noted in that file.
* `make attach`
* Inside the docker container:
* `logdir/run_sims 1`
* Wait for the simulations to finish
* `make plots`
## Building the simulator
The simulator is written in Python, so you don't strictly have to build it per se. However, for convenience, compatibility, and reproduceability, we provide a docker environment that is all set up so that you can run the simulator.
### A note about user ids
The simulator running in the docker container will write its log files into a directory **logdir** on the host machine via a [bind mount](https://docs.docker.com/storage/bind-mounts/). In order that you (the person running the simulator) can read and analyze those log files outside of the docker, the log files should be owned by your user id (on the host machine).
To accomplish this, when the docker image is run, the **wo\_docker\_start** docker init script will check the user and group ids that own the **logdir** directory, and create the "walkingo" user in the docker with those same user and group ids. That way, when the walkingo user in the docker runs the simulator, it will write files to the **logdir** directory owned by you, and you will be able to easily read them.
### Building the docker image
Run `make build` to create a docker image called `walkingonions`. This image is meant to be run from this same directory.
## Running the simulator
To start the docker container, use the `make run` command. This will do several things:
* Create the **logdir** directory, if it does not already exist. This directory will be visible both inside and outside the docker container.
* Create the **run_sims** script inside the **logdir** directory (if it does not already exist); this is a script you can edit and run to start the simulations.
* Start a docker container named `walkingo_exp`, using the docker image `walkingonions` created above.
The docker container will start **in the background**.
On the host machine (_not_ in the docker container), edit the **logdir/run_sims** script. This script specifies which simulations you want to run. The simulator has three different circuit creation modes (see the paper for details):
* `vanilla`: Vanilla Onion Routing (equivalent to regular Tor)
* `telescoping`: Telescoping Walking Onions
* `singlepass`: Single-Pass Walking Onions
In addition, the two Walking Onions modes each have two possible _SNIP authentication_ modes:
* `threshsig`: Threshold signatures
* `merkle`: Merkle trees
(Vanilla Onion Routing only has `none` for the SNIP authentication mode, as it has no SNIPs.)
For any of the five valid combinations of circuit creation mode and SNIP authentication mode, the simulator can run at a specified _scale_. This is a decimal fraction of a network around the size of today's Tor network: 6,500 relays and 2,500,000 clients.
The **logdir/run_sims** file has (initially commented-out) entries for all five mode combinations and a range of scales from 0.05 to 0.30. Edit that file to uncomment the lines for the simulations you want to run.
The simulations can be configured to run for a certain number of _epochs_. An epoch represents one hour of real time, but the simulator can be much slower than real time, as we will see below. In epoch number 1, the directory authorities start up, and the relays start up and register with the directory authorities. In epoch number 2, the relays bootstrap, and the clients start up. In epoch number 3, the clients bootstrap and start building circuits. The number of epochs specified in the **logdir/run_sims** file is the number of epochs in which circuits are built (default 10). However, the first such epoch (epoch 3) is the one in which all clients are bootstrapping, and so it is not part of the "steady state" behaviour. The scripts in the **analysis** directory thus separate out epoch 3 when computing the steady state, and so each simulation run will contribute 9 epochs' worth of data points. After epoch 3, some number of relays and clients will disconnect from the network each epoch, and some number will connect and bootstrap. The distributions of these numbers were selected to be reflective of the current Tor network (see the paper for details).
**Note**: these simulations can take a lot of time and memory. They only use a single core each, so if you have multiple cores, you can uncomment multiple lines of the **logdir/run_sims** file, but you'll need to keep an eye on your RAM usage. The estimated RAM usage for each simulation is documented in the **logdir/run_sims** file; it ranges (on our machines) from 12 GiB for the smallest 0.05-scale simulations up to 76 GiB for the largest 0.30-scale simulations. Our machines took about 15 hours for each of the smallest simulations, and about 11 days for each of the largest.
Once you have uncommented the simulations you want to run, attach to the docker container with the `make attach` command. The docker container is running `screen`, so you can detach from the docker (_without_ terminating any running simulations) using the `screen` _Ctrl-a d_ command. If you exit the shell in the docker with `exit` or just _Ctrl-d_, and no simulations are running, the `screen` process will exit, and the docker container will terminate.
Once attached to the docker, start the simulations by running (from the walkingo user's home directory) `logdir/run_sims` _`seed`_, where _`seed`_ is a small integer (e.g., 1, 2, 8, 10, something like that) that seeds the random number generator. The intent is that if you run the same simulation with the same seed, you should get identical results out. (It turns out if you use Python 3.5.2 on Ubuntu 16.04, you do _not_ get identical results out, but you do on Python 3.6.9 on Ubuntu 18.04, which is what is installed in the docker image.) For our experiments, we used seeds of 8, 10, 20, 21, 22, and 23. The name of the logfile (e.g., `TELESCOPING_MERKLE_0.200000_10_21.log`) records the mode, the scale, the number of (circuit-building) epochs, and the seed.
When you run the `logdir/run_sims` _`seed`_ command, `screen` will switch to showing you the output of your simulation (or one of them if you started more than one). The output is not important to save (the simulator will save the important information in the log files), but it can help you keep an eye on the simulation's progress. To get back to your command line, use the _Ctrl-a 0_ command to `screen` (that's a zero, not a letter o). From there, as above, use _Ctrl-a d_ to detach from the docker container while leaving the simulations running. You can re-attach to the running container at any time using the `make attach` command.
Once your simulations are complete, you can terminate the docker container by attaching to it, and exiting the shell.
### Analyzing the results
The analysis scripts have two steps:
1. Parse the log files to produce dat files containing statistical data from the log files.
2. Plot the dat files as PDFs.
You can run the analysis scripts in whatever directory you like, but it will put the output dat files and pdfs in the current directory. You're likely to want that directory to be the bind-mounted **logdir** directory, so that you can access the results from the host machine. So run:
```
$ make plots
```
The `parselogs.py` command will parse the log files you give it, and write the dat files to the current directory. The `plotdats.py` command will turn those dat files into PDF graphs using `gnuplot` (which is installed in the docker image).
Note that if you did not run simulations for all five mode combinations, you will be missing the corresponding dat files. `gnuplot` will output warnings that it can't find them when you run `plotdats.py`, but it will graph the data you _do_ have anyway.
Some of the graphs also plot _analytical formulas_. These are computations of what the results _should_ be mathematically, and hopefully your simulation results (taking the error bars into account) do in fact follow the analytical formulas. The formulas themselves can be found in the [analytical.py](analysis/analytical.py) file.
The `plotdats.py` script will produce a number of PDF graphs in the current directory (**logdir** in the above example):
* **relay_ss.pdf**: The average number of bytes per epoch each relay sends or receives (total relay bytes divided by the number of relays). The error bars are on a per-epoch basis. Only data from steady-state epochs (ss) is used.
* **relay_ss_wide.pdf**: A zoomed-out view of the above plot, on a log-log scale, showing the asymptotic behaviour of the analytical formulas.
* **client_ss.pdf**, **client_ss_wide.pdf**: as above, but for client bytes instead of for relay bytes.
The above four PDFs are the important ones, and are the ones presented in the paper. There are a number of others, however:
* **relay_perclient_ss.pdf**, **relay_perclient_ss_wide.pdf**: The total number of bytes sent or received by relays, divided by the number of _clients_ (not relays). The reasoning here is that the number of clients is the largest determiner of the total amount of traffic in the network (since the number of circuits built is proportional to the number of clients). Due to churn, the number of clients and the number of relays each change from epoch to epoch. Since the total number of bytes is largely determined by the number of clients, then the **relay_ss.pdf** plot is showing a value whose numerator is a random variable in the number of clients, and whose denominator is a random variable in the number of relays. On _average_, the ratio of the number of clients to the number of relays is fixed, but since both the numerator and denominator are varying, the error bars are larger. This plot has the number of clients in both the numerator and denominator, so the error bars are much smaller, and show the variance due to relay churn, but not also due to client churn.
* **dirauth_ss.pdf**: The number of bytes sent and received by directory authorities, only counting steady-state epochs.
* **dirauth.pdf**: As above, but for all epochs (there is pretty much no difference for directory authorities, so this graph and the above are very similar).
* **relay.pdf**, **client.pdf**: The total number of bytes sent or received per epoch per relay or client, not only in steady state. The data points are on a per-relay (or per-client) basis, not a per-epoch basis, as above. The error bars are not plotted on this graph because they are not meaningful: different relays are _expected_ to have vastly different results, because they have different roles (fallback vs not), properties (bootstrapping vs not), and bandwidths (higher-bandwidth relays are used by clients with higher probability). Similarly clients can be bootstrapping or not, and bootstrapping clients use much more bandwidth in Vanilla Onion Routing than non-bootstrapping clients. We therefore break up the data into different roles and properties in the graphs below:
* **relay_bf.pdf**, **relay_f.pdf**, **relay_b.pdf**, **relay_n.pdf**: Separate plots for bootstrapping fallback relays, non-bootstrapping fallback relays, bootstrapping non-fallback relays, and non-bootstrapping non-fallback relays. Each plot shows the total number of bytes sent and received per epoch, _divided by_ the relay's bandwidth.
* **client_b.pdf**, **client_n.pdf**: Separate plots for bootstrapping and non-bootstrapping clients. These plots are total bytes sent and received per epoch by clients. (Clients do not have bandwidth weights, so these plots are not normalized to bandwidth like the ones above.)

4
__init__.py Normal file
View File

@ -0,0 +1,4 @@
import sys
if __name__ == '__main__':
sys.path.insert(0, '')

18
analysis/README Normal file
View File

@ -0,0 +1,18 @@
The bytecounts.py script produces the formulas that are coded into the
analytical.py script.
The analytical.py script produces the formulas that are coded into the
plotdats.py script.
You shouldn't have to touch either of those, unless the simulator itself
changes.
If you're just plotting the output of simulator logfiles, just do:
$ ./parselogs.py ../path/to/*.log
(the above will generate 5 .dat files)
$ ./plotdats.py
(the above will generate a bunch of .pdf graphs)

218
analysis/analytical.py Executable file
View File

@ -0,0 +1,218 @@
#!/usr/bin/env python3
# Compute analytical formulas for the bytes used per epoch by the
# various modes
import os
import sympy
from bwparser import BandwidthParser
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
A, R_B, R_N, R, logR, C_B, C_N, C, gamma, circ, P_Delta, \
DirAuthConsensusMsg, DirAuthENDIVEDiffMsg, DirAuthGetConsensusMsg, \
DirAuthGetENDIVEDiffMsg, DirAuthUploadDescMsg, DirAuthENDIVEMsg, \
DirAuthGetENDIVEMsg, DirAuthUploadDescMsg, RelayConsensusMsg, \
RelayDescMsg, RelayGetConsensusMsg, RelayGetDescMsg, \
SinglePassCreateCircuitMsgNotLast, SinglePassCreateCircuitMsgLast, \
SinglePassCreatedCircuitCellLast, SinglePassCreatedCircuitCellMiddle, \
SinglePassCreatedCircuitCellFirst, TelescopingCreateCircuitMsg, \
TelescopingCreatedCircuitCell, TelescopingExtendCircuitCell, \
TelescopingExtendedCircuitCell, VanillaCreateCircuitMsg, \
VanillaCreatedCircuitCell, VanillaExtendCircuitCell, \
VanillaExtendedCircuitCell, DirAuthGetConsensusDiffMsg, \
DirAuthConsensusDiffMsg, RelayGetConsensusDiffMsg, \
RelayConsensusDiffMsg \
= sympy.symbols("""
A, R_B, R_N, R, logR, C_B, C_N, C, gamma, circ, P_Delta,
DirAuthConsensusMsg, DirAuthENDIVEDiffMsg, DirAuthGetConsensusMsg,
DirAuthGetENDIVEDiffMsg, DirAuthUploadDescMsg, DirAuthENDIVEMsg,
DirAuthGetENDIVEMsg, DirAuthUploadDescMsg, RelayConsensusMsg,
RelayDescMsg, RelayGetConsensusMsg, RelayGetDescMsg,
SinglePassCreateCircuitMsgNotLast, SinglePassCreateCircuitMsgLast,
SinglePassCreatedCircuitCellLast, SinglePassCreatedCircuitCellMiddle,
SinglePassCreatedCircuitCellFirst, TelescopingCreateCircuitMsg,
TelescopingCreatedCircuitCell, TelescopingExtendCircuitCell,
TelescopingExtendedCircuitCell, VanillaCreateCircuitMsg,
VanillaCreatedCircuitCell, VanillaExtendCircuitCell,
VanillaExtendedCircuitCell, DirAuthGetConsensusDiffMsg,
DirAuthConsensusDiffMsg, RelayGetConsensusDiffMsg,
RelayConsensusDiffMsg
""")
globalsubs = [
(A , 9),
(R_N , R - R_B),
(R_B , 0.010 * R),
(C_N , C - C_B),
(C_B , 0.16 * C),
(circ , gamma * C),
(gamma , 8.9),
(C , 2500000*R/network_size),
(P_Delta, 0.019),
]
# The actual sizes in bytes of each message type were logged by
# uncommenting this line in network.py:
# logging.info("%s size %d", type(self).__name__, sz)
singlepass_merkle_subs = [
(DirAuthConsensusMsg, 877),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, (P_Delta * DirAuthENDIVEMsg).subs(globalsubs)),
(DirAuthENDIVEMsg, 274 * R),
(DirAuthUploadDescMsg, 425),
(RelayConsensusMsg, 873),
(RelayDescMsg, 415),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(SinglePassCreateCircuitMsgLast, 187),
(SinglePassCreateCircuitMsgNotLast, 239),
(SinglePassCreatedCircuitCellFirst, 1426+82*logR),
(SinglePassCreatedCircuitCellMiddle, 903+41*logR),
(SinglePassCreatedCircuitCellLast, 190),
]
singlepass_threshsig_subs = [
(DirAuthConsensusMsg, 789),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, DirAuthENDIVEMsg),
(DirAuthENDIVEMsg, 348*R),
(DirAuthUploadDescMsg, 425),
(RelayConsensusMsg, 784),
(RelayDescMsg, 415),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(SinglePassCreateCircuitMsgLast, 187),
(SinglePassCreateCircuitMsgNotLast, 239),
(SinglePassCreatedCircuitCellFirst, 1554),
(SinglePassCreatedCircuitCellMiddle, 969),
(SinglePassCreatedCircuitCellLast, 190),
]
telescoping_merkle_subs = [
(DirAuthConsensusMsg, 877),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, (P_Delta * DirAuthENDIVEMsg).subs(globalsubs)),
(DirAuthENDIVEMsg, 234 * R),
(DirAuthUploadDescMsg, 372),
(RelayConsensusMsg, 873),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(RelayDescMsg, 362),
(TelescopingCreateCircuitMsg, 120),
(TelescopingCreatedCircuitCell, 179),
(TelescopingExtendCircuitCell, 122),
(TelescopingExtendedCircuitCell, 493+41*logR),
]
telescoping_threshsig_subs = [
(DirAuthConsensusMsg, 789),
(DirAuthGetConsensusMsg, 41),
(DirAuthGetENDIVEMsg, 38),
(DirAuthGetENDIVEDiffMsg, 42),
(DirAuthENDIVEDiffMsg, DirAuthENDIVEMsg),
(DirAuthENDIVEMsg, 307*R),
(DirAuthUploadDescMsg, 372),
(RelayConsensusMsg, 788),
(RelayGetConsensusMsg, 37),
(RelayGetDescMsg, 32),
(RelayDescMsg, 362),
(TelescopingCreateCircuitMsg, 120),
(TelescopingCreatedCircuitCell, 179),
(TelescopingExtendCircuitCell, 122),
(TelescopingExtendedCircuitCell, 556),
]
vanilla_subs = [
(DirAuthConsensusDiffMsg, (P_Delta * DirAuthConsensusMsg).subs(globalsubs)),
(DirAuthConsensusMsg, RelayConsensusMsg),
(DirAuthGetConsensusDiffMsg, 45),
(DirAuthGetConsensusMsg, 41),
(DirAuthUploadDescMsg, 372),
(RelayConsensusDiffMsg, (P_Delta * RelayConsensusMsg).subs(globalsubs)),
(RelayConsensusMsg, 219*R),
(RelayGetConsensusDiffMsg, 41),
(RelayGetConsensusMsg, 37),
(VanillaCreateCircuitMsg, 116),
(VanillaCreatedCircuitCell, 175),
(VanillaExtendCircuitCell, 157),
(VanillaExtendedCircuitCell, 176),
]
# The formulas were output by bytecounts.py
singlepass_totrelay = \
R_N * ( DirAuthConsensusMsg + DirAuthENDIVEDiffMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEDiffMsg + A*DirAuthUploadDescMsg ) \
+ R_B * ( DirAuthConsensusMsg + DirAuthENDIVEMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEMsg + A*DirAuthUploadDescMsg ) \
+ C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( 3*SinglePassCreateCircuitMsgNotLast + 2*SinglePassCreateCircuitMsgLast + 2*SinglePassCreatedCircuitCellLast + 2*SinglePassCreatedCircuitCellMiddle + SinglePassCreatedCircuitCellFirst + 20 )
singlepass_totclient = \
C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( SinglePassCreateCircuitMsgNotLast + SinglePassCreatedCircuitCellFirst + 4 )
telescoping_totrelay = \
R_N * ( DirAuthConsensusMsg + DirAuthENDIVEDiffMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEDiffMsg + A*DirAuthUploadDescMsg ) \
+ R_B * ( DirAuthConsensusMsg + DirAuthENDIVEMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEMsg + A*DirAuthUploadDescMsg ) \
+ C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( 5*TelescopingCreateCircuitMsg + 5*TelescopingCreatedCircuitCell + 4*TelescopingExtendCircuitCell + 4*TelescopingExtendedCircuitCell + 52 )
telescoping_totclient = \
C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
+ circ * ( TelescopingCreateCircuitMsg + TelescopingCreatedCircuitCell + 2*TelescopingExtendCircuitCell + 2*TelescopingExtendedCircuitCell + 20 )
vanilla_totrelay = \
R_N * ( DirAuthConsensusDiffMsg + DirAuthGetConsensusDiffMsg + A*DirAuthUploadDescMsg ) \
+ R_B * ( DirAuthConsensusMsg + DirAuthGetConsensusMsg + A*DirAuthUploadDescMsg ) \
+ C_N * ( RelayConsensusDiffMsg + RelayGetConsensusDiffMsg ) \
+ C_B * ( RelayConsensusMsg + RelayGetConsensusMsg ) \
+ circ * ( 5*VanillaCreateCircuitMsg + 5*VanillaCreatedCircuitCell + 4*VanillaExtendCircuitCell + 4*VanillaExtendedCircuitCell + 52 )
vanilla_totclient = \
C_N * ( RelayConsensusDiffMsg + RelayGetConsensusDiffMsg ) \
+ C_B * ( RelayConsensusMsg + RelayGetConsensusMsg ) \
+ circ * ( VanillaCreateCircuitMsg + VanillaCreatedCircuitCell + 2*VanillaExtendCircuitCell + 2*VanillaExtendedCircuitCell + 20 )
# Copy the output into plotdats.py, replacing 'R' by 'x' and 'logR' by
# 'cail(log(x)/log(2))'
def calculate_relay_analyticals():
relay_perclient_analyticals = {
'singlepass_merkle': (singlepass_totrelay/C).subs(globalsubs).subs(singlepass_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'singlepass_threshsig': (singlepass_totrelay/C).subs(globalsubs).subs(singlepass_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_merkle': (telescoping_totrelay/C).subs(globalsubs).subs(telescoping_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_threshsig': (telescoping_totrelay/C).subs(globalsubs).subs(telescoping_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'vanilla_none': (vanilla_totrelay/C).subs(globalsubs).subs(vanilla_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
}
return relay_perclient_analyticals
def calculate_client_analyticals():
client_perclient_analyticals = {
'singlepass_merkle': (singlepass_totclient/C).subs(globalsubs).subs(singlepass_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'singlepass_threshsig': (singlepass_totclient/C).subs(globalsubs).subs(singlepass_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_merkle': (telescoping_totclient/C).subs(globalsubs).subs(telescoping_merkle_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'telescoping_threshsig': (telescoping_totclient/C).subs(globalsubs).subs(telescoping_threshsig_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
'vanilla_none': (vanilla_totclient/C).subs(globalsubs).subs(vanilla_subs).simplify().replace("logR", "ceil(log(x)/log(2))").replace("R", "x"),
}
return client_perclient_analyticals
if __name__ == '__main__':
print("Client analyticals: ", calculate_client_analyticals())
print("Relay analyticals: ", calculate_relay_analyticals())

250
analysis/bytecounts.py Executable file
View File

@ -0,0 +1,250 @@
#!/usr/bin/env python3
import random # For simulation, not cryptography!
import math
import sys
import os
import logging
import resource
import sympy
import re
import statistics
sys.path.append("..")
import network
import dirauth
import relay
import client
from bwparser import JansenBandwidthParser, KomloBandwidthParser
class BandwidthMeasurer:
def __init__(self, total_numrelays, bw_parser, numdirauths, numrelays, numclients):
self.total_size = total_numrelays
self.bw_parser = bw_parser
# Start some dirauths
self.dirauthaddrs = []
self.dirauths = []
for i in range(numdirauths):
dira = dirauth.DirAuth(i, numdirauths)
self.dirauths.append(dira)
self.dirauthaddrs.append(dira.netaddr)
# Start some relays
self.relays = []
for i in range(numrelays):
self.startrelay()
# The fallback relays are a hardcoded list of a small fraction
# of the relays, used by clients for bootstrapping
numfallbackrelays = 1
fallbackrelays = self.relays[0:1]
for r in fallbackrelays:
r.set_is_fallbackrelay()
network.thenetwork.setfallbackrelays(fallbackrelays)
# Tick the epoch to build the first consensus
network.thenetwork.nextepoch()
# Start some clients
self.clients = []
for i in range(numclients):
self.startclient()
# Throw away all the performance statistics to this point
for d in self.dirauths: d.perfstats.reset()
for r in self.relays: r.perfstats.reset()
# The clients' stats are already at 0, but they have the
# "bootstrapping" flag set, which we want to keep, so we
# won't reset them.
self.allcircs = []
# Tick the epoch to bootstrap the clients
network.thenetwork.nextepoch()
def startrelay(self):
bw = int(self.calculate_relay_bandwidth())
new_relay = relay.Relay(self.dirauthaddrs, bw, 0)
self.relays.append(new_relay)
def calculate_relay_bandwidth(self):
return random.choice(bw_parser.get_distribution())
def stoprelay(self):
self.relays[1].terminate()
del self.relays[1]
def startclient(self):
self.clients.append(client.Client(self.dirauthaddrs))
def stopclient(self):
self.clients[0].terminate()
del self.clients[0]
def buildcircuit(self):
bwm.allcircs.append(bwm.clients[0].channelmgr.new_circuit())
def getstats(self):
# gather stats
totsent = 0
totrecv = 0
totbytes = 0
dirasent = 0
dirarecv = 0
dirabytes = 0
relaysent = 0
relayrecv = 0
relaybytes = 0
clisent = 0
clirecv = 0
clibytes = 0
for d in self.dirauths:
logging.debug("%s", d.perfstats)
dirasent += d.perfstats.bytes_sent
dirarecv += d.perfstats.bytes_received
dirabytes += d.perfstats.bytes_sent + d.perfstats.bytes_received
totsent += dirasent
totrecv += dirarecv
totbytes += dirabytes
for r in self.relays:
logging.debug("%s", r.perfstats)
relaysent += r.perfstats.bytes_sent
relayrecv += r.perfstats.bytes_received
relaybytes += r.perfstats.bytes_sent + r.perfstats.bytes_received
totsent += relaysent
totrecv += relayrecv
totbytes += relaybytes
for c in self.clients:
logging.debug("%s", c.perfstats)
clisent += c.perfstats.bytes_sent
clirecv += c.perfstats.bytes_received
clibytes += c.perfstats.bytes_sent + c.perfstats.bytes_received
totsent += clisent
totrecv += clirecv
totbytes += clibytes
logging.info("DirAuths sent=%s recv=%s bytes=%s" % \
(dirasent, dirarecv, dirabytes))
logging.info("Relays sent=%s recv=%s bytes=%s" % \
(relaysent, relayrecv, relaybytes))
logging.info("Client sent=%s recv=%s bytes=%s" % \
(clisent, clirecv, clibytes))
logging.info("Total sent=%s recv=%s bytes=%s" % \
(totsent, totrecv, totbytes))
# Reset bootstrap flag
for d in self.dirauths: d.perfstats.is_bootstrapping = False
for r in self.relays: r.perfstats.is_bootstrapping = False
for c in self.clients: c.perfstats.is_bootstrapping = False
return (dirabytes, relaybytes, clibytes)
def endepoch(self):
# Close circuits
for c in self.allcircs:
c.close()
self.allcircs = []
# Reset stats
for d in self.dirauths: d.perfstats.reset()
for r in self.relays: r.perfstats.reset()
for c in self.clients: c.perfstats.reset()
network.thenetwork.nextepoch()
if __name__ == '__main__':
# Args: womode snipauthmode numrelays randseed
if len(sys.argv) != 5:
sys.stderr.write("Usage: womode snipauthmode numrelays randseed\n")
sys.exit(1)
bandwidth_file = os.getenv('BW_FILE')
womode = network.WOMode[sys.argv[1].upper()]
snipauthmode = network.SNIPAuthMode[sys.argv[2].upper()]
numrelays = int(sys.argv[3])
randseed = int(sys.argv[4])
bw_parser = None
if os.getenv('BW_ALGO') != "komlo":
bw_parser = JansenBandwidthParser(bw_file=bandwidth_file, sample_size=numrelays)
else:
# keep the original assumption
bw_parser = KomloBandwidthParser(sample_size=numrelays)
total_size = bw_parser.get_relay_num()
# Use symbolic byte counter mode
network.symbolic_byte_counters = True
# Seed the PRNG. On Ubuntu 18.04, this in fact makes future calls
# to (non-cryptographic) random numbers deterministic. On Ubuntu
# 16.04, it does not.
random.seed(randseed)
loglevel = logging.INFO
# Uncomment to see all the debug messages
# loglevel = logging.DEBUG
logging.basicConfig(level=loglevel,
format="%(asctime)s:%(levelname)s:%(message)s")
logging.info("Starting simulation")
# Set the Walking Onions style to use
network.thenetwork.set_wo_style(womode, snipauthmode)
bwm = BandwidthMeasurer(total_size, bw_parser, 9, numrelays, 0)
stats = dict()
logging.info("R_N = %d, R_B = 0, C_N = 0, C_B = 0, circs = 0", numrelays)
stats[(numrelays, 0, 0, 0, 0)] = bwm.getstats()
# Bootstrap one relay
bwm.startrelay()
bwm.endepoch()
logging.info("R_N = %d, R_B = 1, C_N = 0, C_B = 0, circs = 0", numrelays)
stats[(numrelays, 1, 0, 0, 0)] = bwm.getstats()
# Bootstrap one client
bwm.stoprelay()
bwm.startclient()
bwm.endepoch()
logging.info("R_N = %d, R_B = 0, C_N = 0, C_B = 1, circs = 0", numrelays)
stats[(numrelays, 0, 0, 1, 0)] = bwm.getstats()
# No changes, so the client is now not bootstrapping
bwm.endepoch()
logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 0", numrelays)
stats[(numrelays, 0, 1, 0, 0)] = bwm.getstats()
# No more bootstrapping, but build one circuit
bwm.buildcircuit()
logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 1", numrelays)
stats[(numrelays, 0, 1, 0, 1)] = bwm.getstats()
bwm.endepoch()
# No more bootstrapping, but build two circuits
bwm.buildcircuit()
bwm.buildcircuit()
logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 2", numrelays)
stats[(numrelays, 0, 1, 0, 2)] = bwm.getstats()
bwm.endepoch()
print("\n")
print('Total relay bytes:')
print(' R_N * (', stats[(numrelays, 0, 0, 0, 0)][1]/numrelays, ')')
print('+ R_B * (', stats[(numrelays, 1, 0, 0, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
print('+ C_N * (', stats[(numrelays, 0, 1, 0, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
print('+ C_B * (', stats[(numrelays, 0, 0, 1, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
print('+ circ * (', stats[(numrelays, 0, 1, 0, 1)][1] - stats[(numrelays, 0, 1, 0, 0)][1], ')')
print(' check ', stats[(numrelays, 0, 1, 0, 2)][1] - stats[(numrelays, 0, 1, 0, 1)][1])
print("\n")
print('Total client bytes:')
print(' R_N * (', stats[(numrelays, 0, 0, 0, 0)][2]/numrelays, ')')
print('+ R_B * (', stats[(numrelays, 1, 0, 0, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
print('+ C_N * (', stats[(numrelays, 0, 1, 0, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
print('+ C_B * (', stats[(numrelays, 0, 0, 1, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
print('+ circ * (', stats[(numrelays, 0, 1, 0, 1)][2] - stats[(numrelays, 0, 1, 0, 0)][2], ')')
print(' check ', stats[(numrelays, 0, 1, 0, 2)][2] - stats[(numrelays, 0, 1, 0, 1)][2])

48
analysis/calcprop.py Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env python3
import os
import sys
import math
import numpy as np
from bwparser import JansenBandwidthParser, KomloBandwidthParser
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
if len(sys.argv) < 3:
print("Usage: calcprop.py scale epoch_count")
exit()
scale = float(sys.argv[1])
epoch_count = int(sys.argv[2]) + 3
bw_parser = None
if os.getenv('BW_ALGO') != "komlo":
bw_parser = JansenBandwidthParser(bw_file=bandwidth_file, netscale=scale)
else:
bw_parser = KomloBandwidthParser(sample_size=math.ceil(6500*scale))
dats = [
('vanilla_none.dat', 'Vanilla'),
('singlepass_merkle.dat', 'Sing(M)'),
('telescoping_merkle.dat', 'Tele(M)'),
('singlepass_threshsig.dat', 'Sing(T)'),
('telescoping_threshsig.dat', 'Tele(T)'),
]
for file, name in dats:
with open(file) as f:
for line in f.readlines():
total_bytes = float(line.split(" ")[3])
net_size = bw_parser.get_relay_num()
throughput = bw_parser.get_relay_throughput()
relay_traffic = (net_size*total_bytes)/(epoch_count * 1000)
prop = relay_traffic/throughput
print("===================================")
print(name)
print("Throughput: " + np.format_float_scientific(throughput, precision=3))
print("Total bytes: " + np.format_float_scientific(relay_traffic, precision=3))
print("Proportion: " + np.format_float_scientific(prop, precision=3))

281
analysis/parselogs.py Executable file
View File

@ -0,0 +1,281 @@
#!/usr/bin/env python3
# Parse the log files output by the simulator to get the overall average
# and stddev of total bytes sent+received per epoch for each of:
# - dirauths
# - total relays
# - bootstrapping fallback relays per bw
# - non-bootstrapping fallback relays per bw
# - bootstrapping non-fallback relays per bw
# - non-bootstrapping non-fallback relays per bw
# - total clients
# - bootstrapping clients
# - non-bootstrapping clients
# - steady-state dirauths (skipping the first epoch in which all clients
# are bootstrapping)
# - steady-state relays (as above)
# - steady-state clients (as above)
# - steady-state total relay traffic per client (as above)
# Pass the names of the log files as command-line arguments, or the log
# files themselves on stdin.
# The output will be five files named:
# - vanilla_none.dat
# - telescoping_merkle.dat
# - telescoping_threshsig.dat
# - singlepass_merkle.dat
# - singlepass_threshsig.dat
# Each file will contain one line for every network scale seen in the
# logs. The line will consist of 27 fields:
# - the network scale (float)
# - the mean and stddev (two floats) for each of the above 13 classes,
# in the above order; for the steady states, the stats are computed
# over the per-epoch averages, while for the others, the stats are
# computed on a per-entity basis
import math
import re
import os
import fileinput
from bwparser import BandwidthParser
modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig",
"singlepass_merkle", "singlepass_threshsig" ]
class StatAccum:
"""Accumulate (mean, stddev, N) triples to compute an overall (mean,
stddev, N)."""
def __init__(self):
self.sumX = 0
self.sumXsq = 0
self.totN = 0
def accum(self, mean, stddev, N):
if N == 0:
# Nothing to change
return
if N == 1:
# stddev will be None
this_sumX = mean
this_sumXsq = mean*mean
else:
this_sumX = mean * N
variance = stddev * stddev
this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N
self.sumX += this_sumX
self.sumXsq += this_sumXsq
self.totN += N
def stats(self):
if self.totN == 0:
return (None, None, 0)
if self.totN == 1:
return (self.sumX, None, 1)
mean = self.sumX / self.totN
variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \
/ (self.totN - 1)
stddev = math.sqrt(variance)
return (mean, stddev, self.totN)
def __str__(self):
mean, stddev, N = self.stats()
if mean is None:
mean = 0
if stddev is None:
stddev = 0
return "%f %f" % (mean, stddev)
class StatBundle:
"""A bundle of 13 StatAccums, corresponding to the 13 entity classes
listed above."""
def __init__(self):
self.stats = [StatAccum() for i in range(14)]
def dirauth(self, mean, stddev, N):
self.stats[0].accum(mean, stddev, N)
def relay_all(self, mean, stddev, N):
self.stats[1].accum(mean, stddev, N)
def relay_fb_boot(self, mean, stddev, N):
self.stats[2].accum(mean, stddev, N)
def relay_fb_nboot(self, mean, stddev, N):
self.stats[3].accum(mean, stddev, N)
def relay_nfb_boot(self, mean, stddev, N):
self.stats[4].accum(mean, stddev, N)
def relay_nfb_nboot(self, mean, stddev, N):
self.stats[5].accum(mean, stddev, N)
def client_all(self, mean, stddev, N):
self.stats[6].accum(mean, stddev, N)
def client_circuit_build(self, mean, stddev, N):
self.stats[7].accum(mean, stddev, N)
def client_boot(self, mean, stddev, N):
self.stats[8].accum(mean, stddev, N)
def client_nboot(self, mean, stddev, N):
self.stats[9].accum(mean, stddev, N)
def steady_dirauth(self, mean, stddev, N):
self.stats[10].accum(mean, stddev, N)
def steady_relay(self, mean, stddev, N):
self.stats[11].accum(mean, stddev, N)
def steady_client(self, mean, stddev, N):
self.stats[12].accum(mean, stddev, N)
def steady_relay_perclient(self, mean, stddev, N):
self.stats[13].accum(mean, stddev, N)
def __str__(self):
return '%s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (
self.stats[0], self.stats[1], self.stats[2], self.stats[3],
self.stats[4], self.stats[5], self.stats[6], self.stats[7],
self.stats[8], self.stats[9], self.stats[10],
self.stats[11], self.stats[12], self.stats[13])
class LogParser:
"""A class to parse the logfiles output by sim.py."""
def __init__(self, network_size):
# self.stats is a dict indexed by mode name (like
# "singlepass_merkle") whose value is a dict indexed
# by network scale whose value is a StatBundle
self.stats = dict()
self.network_size = network_size
self.curbundle = None
self.fbbootstrapping = None
self.steadystate = False
self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_')
self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*circuit_building_time=([\d\.]+).*N=(\d+)')
self.mibre = re.compile('MiB used')
def parse_line(self, line):
m = self.startre.search(line)
if m:
mode = m.group(1).lower() + "_" + m.group(2).lower()
scale = m.group(3)
if mode not in self.stats:
self.stats[mode] = dict()
if scale not in self.stats[mode]:
self.stats[mode][scale] = StatBundle()
self.curbundle = self.stats[mode][scale]
self.fbbootstrapping = True
self.steadystate = False
return
m = self.statperbwre.search(line)
circuit_buildings = 0.0
if m:
enttype, means, stddevs, meanperbws, stddevperbws, Ns = \
m.group(1,2,4,5,7,8)
else:
m = self.statre.search(line)
if m:
enttype, means, stddevs, circuit_buildings, Ns = \
m.group(1,3,5,6,7)
meanperbws, stddevperbws = None, None
else:
m = self.mibre.search(line)
if m:
# We've reached steady state
self.steadystate = True
return
mean = float(means)
if stddevs:
stddev = float(stddevs)
else:
stddev = None
if meanperbws:
meanperbw = float(meanperbws)
else:
meanperbw = None
if stddevperbws:
stddevperbw = float(stddevperbws)
else:
stddevperbw = None
if circuit_buildings:
circuit_buildings = float(circuit_buildings)
N = int(Ns)
# print('%s %s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, circuit_buildings, N))
if enttype == 'Dirauths':
self.curbundle.dirauth(mean, stddev, N)
if self.steadystate:
self.curbundle.steady_dirauth(mean, None, 1)
elif enttype == 'Relays':
self.curbundle.relay_all(mean, stddev, N)
if self.steadystate:
self.curbundle.steady_relay(mean, None, 1)
self.totrelaybytes = mean * N
elif enttype == 'Relays(FB)':
if self.fbbootstrapping:
self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N)
self.fbbootstrapping = False
else:
self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N)
elif enttype == 'Relays(B)':
self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N)
elif enttype == 'Relays(NB)':
self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N)
elif enttype == 'Clients':
self.curbundle.client_all(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
if self.steadystate:
self.curbundle.steady_client(mean, None, 1)
self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1)
elif enttype == 'Clients(B)':
self.curbundle.client_boot(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
elif enttype == 'Clients(NB)':
self.curbundle.client_nboot(mean, stddev, N)
self.curbundle.client_circuit_build(circuit_buildings, stddev, N)
else:
raise ValueError('Unknown entity type "%s"' % enttype)
def write_output(self):
for mode in self.stats.keys():
with open("%s.dat" % mode, "w") as datout:
for scale in sorted(self.stats[mode].keys()):
datout.write("%s %s\n" % \
(self.network_size*float(scale), self.stats[mode][scale]))
datout.close()
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
logparser = LogParser(network_size)
for line in fileinput.input():
logparser.parse_line(line)
logparser.write_output()

51
analysis/plotbandwidth.py Executable file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
from bwparser import BandwidthParser
# Plot the bandwidth files using gnuplot
if __name__ == '__main__':
if len(sys.argv) != 2:
sys.stderr.write("Usage: outputdir\n")
sys.exit(1)
# set some defaults
bandwidth_file = os.getenv('BW_FILE')
output_dir = sys.argv[1]
output_file = output_dir + "/bw_file.pdf"
# calculate invariants
bw_parser = BandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
# Create dat files and gpcode
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s'
set title 'Bandwidth distribution'
set key out
set xlabel "Bandwidth (kB/s)"
set ylabel "Number of relays"
set xtics 100000
plot """ % (output_file)
for bw_file in bw_parser.get_files():
local_bw_parser = BandwidthParser(bw_file=bw_file)
bandwidths = local_bw_parser.get_distribution()
with open("%s.dat" % bw_file, "w") as datout:
for i in range(len(bandwidths)):
datout.write("%s %s\n" % \
(i, bandwidths[i]))
datout.close()
date_string = bw_file.split("-")[0] + "-" + bw_file.split("-")[1] + "-" + bw_file.split("-")[2]
gpcode += "'%s.dat' using 2:1 with lines title '%s'," % (bw_file, date_string)
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))

116
analysis/plotdats.py Executable file
View File

@ -0,0 +1,116 @@
#!/usr/bin/env python3
import os
import math
import subprocess
import analytical
from bwparser import JansenBandwidthParser
# Plot the dat files generated by parselogs.py using gnuplot
if __name__ == '__main__':
# sensible defaults
bandwidth_file = os.getenv('BW_FILE')
bw_parser = None
network_size = 0
if os.getenv('BW_ALGO') != "komlo":
bw_parser = JansenBandwidthParser(bw_file=bandwidth_file)
network_size = bw_parser.get_relay_num()
else:
# keep the original assumption
network_size = 6500
# The analytical functions come from analytical.py
# Replace 'R' in the output of that program by 'x' and replace
# 'logR' by 'ceil(log(x)/log(2))'.
relay_perclient_analyticals = analytical.calculate_relay_analyticals()
relay_analyticals = {
k: '(2500000/%s)*(%s)' % (network_size, v) for k, v in relay_perclient_analyticals.items()
}
client_analyticals = analytical.calculate_client_analyticals()
plots = [
('dirauth', 'Directory authorities total bytes each', 2, False, None),
('relay', 'Relay total bytes each', 4, False, None),
('relay_bf', 'Bootstrapping fallback relays bytes per bw', 6, True, None),
('relay_f', 'Non-bootstrapping fallback relays bytes per bw', 8, True, None),
('relay_b', 'Bootstrapping normal relays bytes per bw', 10, True, None),
('relay_n', 'Non-bootstrapping normal relays bytes per bw', 12, True, None),
('client', 'Client total bytes each', 14, False, None),
('client_c', 'Client circuit building times', 16, True, None),
('client_b', 'Bootstrapping client total bytes', 18, True, None),
('client_n', 'Non-bootstrapping client total bytes', 20, True, None),
('dirauth_ss', 'Directory authority total bytes each', 22, True, None),
('relay_ss', 'Relay total bytes each', 24, True, relay_analyticals),
('client_ss', 'Client total bytes each', 26, True, client_analyticals),
('relay_perclient_ss', 'Relay total bytes per client', 28, True, relay_perclient_analyticals),
('relay_ss_wide', 'Relay total bytes each', 24, True, relay_analyticals),
('client_ss_wide', 'Client total bytes each', 26, True, client_analyticals),
('relay_perclient_ss_wide', 'Relay total bytes per client', 28, True, relay_perclient_analyticals),
]
dats = [
('vanilla_none', 'Vanilla', 1),
('singlepass_merkle', 'Sing(M)', 2),
('telescoping_merkle', 'Tele(M)', 3),
('singlepass_threshsig', 'Sing(T)', 4),
('telescoping_threshsig', 'Tele(T)', 5),
]
for filename, title, col, errbars, analyticals in plots:
if analyticals is None:
analyticals = dict()
if filename == 'relay_ss_wide':
ranges = 'set xrange [300:%s]\nset logscale xy\nset yrange [10000000:]' % (network_size*100)
elif filename[-5:] == '_wide':
ranges = "set xrange [300:%s]\nset logscale xy\nset yrange [10000:]" % (network_size*100)
else:
ranges = "set xrange [0:2200]\nset yrange [0:]"
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s.pdf'
set title '%s'
%s
set key out
set rmargin 17
set arrow from %s, graph 0 to %s, graph 1 nohead lc 0 lw 2
set xlabel "Number of relays"
set style line 1 lw 2 lc 1 pt 1
set style line 2 lw 2 lc 2 pt 1
set style line 3 lw 2 lc 3 pt 1
set style line 4 lw 2 lc 4 pt 1
set style line 5 lw 2 lc 5 pt 1
set style line 10 lw 2 lc 0 dt 2
set style line 11 lw 2 lc 1 dt 2
set style line 12 lw 2 lc 2 dt 2
set style line 13 lw 2 lc 3 dt 2
set style line 14 lw 2 lc 4 dt 2
set style line 15 lw 2 lc 5 dt 2
plot """ % (filename, title, ranges, network_size, network_size)
firstplot = True
for datname, title, style in dats:
if not firstplot:
gpcode += ", "
else:
firstplot = False
gpcode += "'%s.dat' using 1:%d with lines ls %d title '%s'" % \
(datname, col, style, title)
if errbars:
gpcode += ", '%s.dat' using 1:%d:%d with errorbars ls %d notitle" % \
(datname, col, col+1, style)
if datname in analyticals:
gpcode += ", %s ls %d notitle" % \
(analyticals[datname], style+10)
if analyticals:
gpcode += ", -100 ls 10 title 'Analytical'"
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))

51
analysis/plotdist.py Executable file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
import os
import sys
import subprocess
from bwparser import BandwidthParser, JansenBandwidthParser
# Plot the bandwidth files using gnuplot
if __name__ == '__main__':
if len(sys.argv) != 2:
sys.stderr.write("Usage: outputdir\n")
sys.exit(1)
# set some defaults
bandwidth_file = os.getenv('BW_FILE')
output_dir = sys.argv[1]
output_file = output_dir + "/bw_dist.pdf"
# calculate invariants
bw_parser = BandwidthParser(bw_file=bandwidth_file)
true_avg = bw_parser.get_average()
jansen_parser = JansenBandwidthParser(bw_file=bandwidth_file)
# Create dat file
with open("%s-jansen.dat" % bandwidth_file, "w") as datout:
for i in range(1,101):
netscale = i/100
jansen_parser.set_scale(netscale)
datout.write("%s %s\n" % \
(netscale, jansen_parser.get_average()))
datout.close()
# Build plot
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s'
set title 'Generated Distribution for %s'
set key out
set arrow from 1, graph 0 to 1
set xlabel "Scale"
set ylabel "Average Bandwidth"
set xtics 0.1
plot """ % (output_file, bandwidth_file)
gpcode += "'%s-jansen.dat' using 1:2 with lines title 'Bandwidth'" % (bandwidth_file)
gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE)
gp.communicate(gpcode.encode('ascii'))

132
analysis/plotonionperf.py Executable file
View File

@ -0,0 +1,132 @@
#!/usr/bin/env python3
import os
import sys
import csv
import fileinput
import subprocess
def parse_onionperf_file(file_name):
res = {}
# fetch sources
sources = fetch_sources(file_name)
# collect circuit building times for source
for s in sources:
with open(os.path.dirname(__file__) + "/datasets/" + file_name, 'r') as f:
reader = csv.DictReader(filter(lambda row: row[0]!='#', f), delimiter=',')
s_counter = 0
s_total = 0
for row in reader:
if row['source'] == s:
# we have completed a circuit after reaching the exit relay
if int(row['position']) == 3:
s_counter += 1
s_total += float(row['md'])
# calculate average
if s_counter != 0:
res[s] = s_total/s_counter
return res
def fetch_sources(file_name):
sources = []
with open(os.path.dirname(__file__) + "/datasets/" + file_name, 'r') as f:
# open the file rows as dicts, skip the comments
reader = csv.DictReader(filter(lambda row: row[0]!='#', f), delimiter=',')
# fetch sources
for row in reader:
if row['source'] != '' and row['source'] not in sources:
sources.append(row['source'])
f.close()
return sources
# Plot the relative circuit building times using gnuplot
if __name__ == '__main__':
# set some defaults
file_name = os.getenv('CB_FILE')
# fetch average times from onion performance file
avg_times = parse_onionperf_file(file_name)
#print(avg_times)
# fetch simulator times
sim_times = {}
dats = []
dats += [each for each in os.listdir('.') if each.endswith('.dat') and 'client_c_relative' not in each]
for dat in dats:
with open(dat, 'r') as f:
for line in f.readlines():
sim_times.update({dat: float(line.split(" ")[15])})
#print(sim_times)
# vanilla is base
base = sim_times['vanilla_none.dat']
# calculate ratios
ratios = {
'tele-thresh': sim_times['telescoping_threshsig.dat']/base,
'tele-merkle': sim_times['telescoping_merkle.dat']/base,
'singlep-thresh': sim_times['singlepass_threshsig.dat']/base,
'singlep-merkle': sim_times['singlepass_merkle.dat']/base
}
#print(ratios)
# calculate relative times
relative_times = {}
for avg in avg_times.keys():
for ratio in ratios.keys():
relative_times.update({
avg + "-vanilla": avg_times[avg]
})
relative_times.update({
avg + "-" + ratio: avg_times[avg] * ratios[ratio]
})
#print(relative_times)
# Create dat file
for source in fetch_sources(file_name):
with open("client_c_relative-%s.dat" % source, "w") as datout:
for k in relative_times.keys():
if source in k:
datout.write("%s " % \
(relative_times[k]))
datout.close()
# Build plot
for source in fetch_sources(file_name):
output_file = "client_c_relative-%s.pdf" % source
gpcode = """set terminal pdf font "DejaVuSans,14" size 5,2.25
set output '%s'
set title 'Relative circuit building times'
set arrow from %s
set grid y
set key out
unset xtics
set ylabel 'Relative times (ms)'
plot """ % (output_file, max(relative_times.values()))
col = 1
firstplot = True
for k in relative_times.keys():
if source in k and source:
if firstplot is False:
gpcode += ", "
gpcode += "'client_c_relative-%s.dat' using 0:%d with points title '%s'" % (source, col, k)
col += 1