Initial commit

This commit is contained in:
Florent Guiotte 2017-11-08 08:37:41 +00:00
commit ef25ab39cc

112
markov.py Normal file
View File

@ -0,0 +1,112 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# \file markov.py
# \brief TODO
# \author Florent Guiotte <florent.guiotte@gmail.com>
# \version 0.2
# \date 04 sept. 2017
#
# TODO details
import sys
import random
import pickle
import getopt
def main():
# parse command line options
opts, args = getopt.getopt(sys.argv[1:], "hf:n:o:l:", ["help","create-dic"])
create = False
loadpk = "out.pickle"
output = "out.pickle"
infile = "markov.log"
nb = 1
for o, a in opts:
if '--create-dic' == o:
create = True
elif o in ('--help', '-h'):
print(__doc__)
sys.exit(0)
elif o == "-o":
output = a
elif o == "-f":
infile = a
elif o == "-n":
nb = int(a)
elif o == "-l":
loadpk = a
if create:
megadic = {}
buildDic(megadic, infile)
#cslist = computeProbCSList(dic)
#print(randomPick(cslist))
megacslist = megaComputeProbCSList(megadic)
#print(megacslist)
pickle.dump(megacslist, open(output, 'wb'))
else:
try:
megacslist = pickle.load(open(loadpk, 'rb'))
except:
print("No pickle file provided")
sys.exit(2)
#print(megacslist)
for i in range(nb):
print(megaRandomPic(megacslist))
def buildDic(dic, logfile):
log = open(logfile, "r")
for line in log:
#print("L: " + line)
pre = ("__firstword__", "__firstword__")
for word in line.split():
#print("W: " + word)
addInDic(dic, word, pre)
pre = (pre[1], word)
addInDic(dic, "__eol__", pre)
def addInDic(dic, w, pre):
if not pre in dic:
dic[pre] = {}
if not w in dic[pre]:
dic[pre][w] = 1
else:
dic[pre][w] = dic[pre][w] + 1
def megaComputeProbCSList(megadic):
megacsdic = {}
for dic in megadic.items():
cslist = computeProbCSList(dic[1])
megacsdic[dic[0]] = cslist
return megacsdic
def computeProbCSList(dic):
tsum = sum(dic.values())
sortedDic = sorted(dic.items(), key=lambda x:x[1])
sortedDicProb = []
dsum = 0
for t in sortedDic:
dsum = t[1]/tsum + dsum
sortedDicProb.append((t[0], dsum))
return sortedDicProb
def megaRandomPic(megacsdic):
index = ("__firstword__", "__firstword__")
currentw = randomPick(megacsdic[index])
string = ""
while currentw != "__eol__":
string = string + currentw + " "
index = (index[1], currentw)
currentw = randomPick(megacsdic[index])
return string
def randomPick(CSlist):
r = random.random()
for t in CSlist:
if r < t[1]:
return t[0]
return None
if __name__ == "__main__":
main()