For no good reason, I implemented a Markov chain generator in Python. I used Python's iterators throughout the implementation. Each of the functions called from the main program produces an iterator, consumes an iterator or both.
"""
The script markov.py reads text from standard input and writes
a pagragraph of text to standard output. Blank lines in the
input are treated as paragraph separtors and are represented
as '\n' in the code.
"""
import random
nlnl = '\n', '\n'
def new_key(key, word):
if word == '\n': return nlnl
else: return (key[1], word)
def markov_data_from_words(words):
data = {}
key = nlnl
for word in words:
data.setdefault(key, []).append(word)
key = new_key(key, word)
return data
def words_from_markov_data(data):
key = nlnl
while 1:
word = random.choice(data.get(key, nlnl))
key = new_key(key, word)
yield word
def words_from_file(f):
for line in f:
words = line.split()
if len(words):
for word in words:
yield word
else:
yield '\n'
yield '\n'
def paragraph_from_words(words):
result = []
for word in words:
if word == '\n': break
result.append(word)
return ' '.join(result)
if __name__ == '__main__':
import sys
print paragraph_from_words(
words_from_markov_data(
markov_data_from_words(
words_from_file(
sys.stdin))))
Update 1/2005: JayCee ported the script to Ruby.
0 comments:
Post a Comment