---
title: "Cryptography"
author: "Tim Chumley"
date: ""
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
```
### Setup
The first line below loads the code that does the encrypting and ecrypting of messages, as well as the implementation of the Metropolis-Hastings algorithm. For now, I've left this code obscured so that we can skip over technical details. The second line loads our matrix $M$ of letter frequencies from War and Peace.
```{r}
source("http://tchumley.mtholyoke.edu/m339sp/project/cryptography-functions.R")
M <- read.table("http://tchumley.mtholyoke.edu/m339sp/project/war-and-peace-matrix.txt")
```
### Demo
We start by choosing an arbitrary (random) encryption key and encrypting a message consisting of text from an Edgar Allen Poe story.
```{r}
set.seed(339)
encryption.key <- sample(1:26)
message <- "coincidences in general are great stumbling blocks in the way of that class of thinkers who have been educated to know nothing of the theory of probabilities that theory to which the most glorious objects of human research are indebted for the most glorious of illustrations edgar allen poe the murders in the rue morgue"
encrypted.message <- decrypt(message, encryption.key)
encrypted.message
```
Now we run the Metropolis-Hastings algorithm in an attempt to decrypt the message.
```{r}
decryptionRoutine(1e5, encrypted.message, M)
```
### Appendix
Here is the code implementing the algorithm, taken from our textbook.
```{r}
### Dobrow code ###
score <- function(code, mat)
{
logmat = log(mat + 1)
p <- 0
# For each pair of letters in the decoded message
# query the transition matrix for the probability of that pair
for (i in 1:(nchar(code)-1)){
p <- p + logmat[charIndex(substr(code, i, i)),charIndex(substr(code, i+1, i+1))]
}
# return the sum of these probabilities
p
}
ascii <- function(char)
{
strtoi(charToRaw(char),16L) #get 'raw' ascii value
}
# charIndex takes in a character and returns its 'char value'
# defined as a=1, b=2, ..., z=26, space=27
# this matches the array created by read.table
charIndex <- function(char)
{
aValue <- ascii(char)
if (aValue == 32)
{
# return 27 if a space
27
} else
{
#ascii sets "a" as 97, so subtract 96
aValue - 96
}
}
# Decrypts code according to curFunc
decrypt <- function(code,curFunc)
{
out <- code
# for each character in the message, decode it according to the curFunc
for (i in 1:nchar(code))
{
charInd <- charIndex(substr(code,i,i))
if (charInd < 27)
{
# change the ith character to the character determined by the curFunc
substr(out,i,i) <- rawToChar(as.raw(curFunc[charInd] + 96))
}
}
out
}
decryptionRoutine <- function(N, codemess, mat) {
# instantiate a map to hold previously computed codes' scores
map <- new.env(hash=T, parent=emptyenv())
# we begin with a basic (a->a, z->z) function for decrypting the codemess
curFunc <- 1:27
# calculate the score for curFunc and store it in the map
oldScore <- score(decrypt(codemess,curFunc),mat)
map[[paste(curFunc, collapse='')]] <- oldScore
for (iteration in 1:N) {
# sample two letters to swap
swaps <- sample(1:26,2)
oldFunc <- curFunc
# let curFunc be oldFunc but with two letters swapped
curFunc[swaps[1]] <- oldFunc[swaps[2]]
curFunc[swaps[2]] <- oldFunc[swaps[1]]
# if we have already scored this decoding,
# retrieve score from our map
if (exists(paste(curFunc, collapse =''), map)){
newScore <- map[[paste(curFunc, collapse ='')]]
} else
# if we have not already scored this decoding,
# calculate it and store it in the map
{
newScore <- score (decrypt(codemess,curFunc),mat)
map[[paste(curFunc, collapse = '')]] <- newScore
}
# decide whether to accept curFunc or to revert to oldFunc
if (runif(1) > exp(newScore-oldScore))
{
curFunc <- oldFunc
} else
{
oldScore <- newScore
}
# print out our decryption every 100 iterations
if ((iteration %% 100) == 0)
{
print(c(iteration,decrypt(codemess,curFunc)))
}
}
}
```