112 lines
2.8 KiB
Python
112 lines
2.8 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
# \file markov.py
|
|
# \brief TODO
|
|
# \author Florent Guiotte <florent.guiotte@gmail.com>
|
|
# \version 0.2
|
|
# \date 04 sept. 2017
|
|
#
|
|
# TODO details
|
|
|
|
import sys
|
|
import random
|
|
import pickle
|
|
import getopt
|
|
|
|
def main():
|
|
# parse command line options
|
|
opts, args = getopt.getopt(sys.argv[1:], "hf:n:o:l:", ["help","create-dic"])
|
|
create = False
|
|
loadpk = "out.pickle"
|
|
output = "out.pickle"
|
|
infile = "markov.log"
|
|
nb = 1
|
|
for o, a in opts:
|
|
if '--create-dic' == o:
|
|
create = True
|
|
elif o in ('--help', '-h'):
|
|
print(__doc__)
|
|
sys.exit(0)
|
|
elif o == "-o":
|
|
output = a
|
|
elif o == "-f":
|
|
infile = a
|
|
elif o == "-n":
|
|
nb = int(a)
|
|
elif o == "-l":
|
|
loadpk = a
|
|
|
|
if create:
|
|
megadic = {}
|
|
buildDic(megadic, infile)
|
|
#cslist = computeProbCSList(dic)
|
|
#print(randomPick(cslist))
|
|
megacslist = megaComputeProbCSList(megadic)
|
|
#print(megacslist)
|
|
pickle.dump(megacslist, open(output, 'wb'))
|
|
else:
|
|
try:
|
|
megacslist = pickle.load(open(loadpk, 'rb'))
|
|
except:
|
|
print("No pickle file provided")
|
|
sys.exit(2)
|
|
|
|
#print(megacslist)
|
|
for i in range(nb):
|
|
print(megaRandomPic(megacslist))
|
|
|
|
def buildDic(dic, logfile):
|
|
log = open(logfile, "r")
|
|
for line in log:
|
|
#print("L: " + line)
|
|
pre = ("__firstword__", "__firstword__")
|
|
for word in line.split():
|
|
#print("W: " + word)
|
|
addInDic(dic, word, pre)
|
|
pre = (pre[1], word)
|
|
addInDic(dic, "__eol__", pre)
|
|
|
|
def addInDic(dic, w, pre):
|
|
if not pre in dic:
|
|
dic[pre] = {}
|
|
if not w in dic[pre]:
|
|
dic[pre][w] = 1
|
|
else:
|
|
dic[pre][w] = dic[pre][w] + 1
|
|
|
|
def megaComputeProbCSList(megadic):
|
|
megacsdic = {}
|
|
for dic in megadic.items():
|
|
cslist = computeProbCSList(dic[1])
|
|
megacsdic[dic[0]] = cslist
|
|
return megacsdic
|
|
|
|
def computeProbCSList(dic):
|
|
tsum = sum(dic.values())
|
|
sortedDic = sorted(dic.items(), key=lambda x:x[1])
|
|
sortedDicProb = []
|
|
dsum = 0
|
|
for t in sortedDic:
|
|
dsum = t[1]/tsum + dsum
|
|
sortedDicProb.append((t[0], dsum))
|
|
return sortedDicProb
|
|
|
|
def megaRandomPic(megacsdic):
|
|
index = ("__firstword__", "__firstword__")
|
|
currentw = randomPick(megacsdic[index])
|
|
string = ""
|
|
while currentw != "__eol__":
|
|
string = string + currentw + " "
|
|
index = (index[1], currentw)
|
|
currentw = randomPick(megacsdic[index])
|
|
return string
|
|
|
|
def randomPick(CSlist):
|
|
r = random.random()
|
|
for t in CSlist:
|
|
if r < t[1]:
|
|
return t[0]
|
|
return None
|
|
|
|
if __name__ == "__main__":
|
|
main() |