November 10, 2003

Markov Chain Generator

For no good reason, I implemented a Markov chain generator in Python. I used Python's iterators throughout the implementation. Each of the functions called from the main program produces an iterator, consumes an iterator or both.

"""
The script markov.py reads text from standard input and writes
a pagragraph of text to standard output.  Blank lines in the
input are treated as paragraph separtors and are represented
as '\n' in the code.
"""
import random

nlnl = '\n', '\n'

def new_key(key, word):
  if word == '\n': return nlnl
  else: return (key[1], word)

def markov_data_from_words(words):
  data = {}
  key = nlnl
  for word in words:
      data.setdefault(key, []).append(word)
      key = new_key(key, word)
  return data

def words_from_markov_data(data):
  key = nlnl
  while 1:
      word = random.choice(data.get(key, nlnl))
      key = new_key(key, word)
      yield word

def words_from_file(f):
  for line in f:
      words = line.split()
      if len(words):
          for word in words:
              yield word
      else:
          yield '\n'
  yield '\n'

def paragraph_from_words(words):
  result = []
  for word in words:
      if word == '\n': break
      result.append(word)
  return ' '.join(result)

if __name__ == '__main__':
  import sys
  print paragraph_from_words(
          words_from_markov_data(
              markov_data_from_words(
                  words_from_file(
                      sys.stdin))))

Update 1/2005: JayCee ported the script to Ruby.

0 comments: