Initial commit
This commit is contained in:
commit
ef25ab39cc
112
markov.py
Normal file
112
markov.py
Normal file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# \file markov.py
|
||||
# \brief TODO
|
||||
# \author Florent Guiotte <florent.guiotte@gmail.com>
|
||||
# \version 0.2
|
||||
# \date 04 sept. 2017
|
||||
#
|
||||
# TODO details
|
||||
|
||||
import sys
|
||||
import random
|
||||
import pickle
|
||||
import getopt
|
||||
|
||||
def main():
|
||||
# parse command line options
|
||||
opts, args = getopt.getopt(sys.argv[1:], "hf:n:o:l:", ["help","create-dic"])
|
||||
create = False
|
||||
loadpk = "out.pickle"
|
||||
output = "out.pickle"
|
||||
infile = "markov.log"
|
||||
nb = 1
|
||||
for o, a in opts:
|
||||
if '--create-dic' == o:
|
||||
create = True
|
||||
elif o in ('--help', '-h'):
|
||||
print(__doc__)
|
||||
sys.exit(0)
|
||||
elif o == "-o":
|
||||
output = a
|
||||
elif o == "-f":
|
||||
infile = a
|
||||
elif o == "-n":
|
||||
nb = int(a)
|
||||
elif o == "-l":
|
||||
loadpk = a
|
||||
|
||||
if create:
|
||||
megadic = {}
|
||||
buildDic(megadic, infile)
|
||||
#cslist = computeProbCSList(dic)
|
||||
#print(randomPick(cslist))
|
||||
megacslist = megaComputeProbCSList(megadic)
|
||||
#print(megacslist)
|
||||
pickle.dump(megacslist, open(output, 'wb'))
|
||||
else:
|
||||
try:
|
||||
megacslist = pickle.load(open(loadpk, 'rb'))
|
||||
except:
|
||||
print("No pickle file provided")
|
||||
sys.exit(2)
|
||||
|
||||
#print(megacslist)
|
||||
for i in range(nb):
|
||||
print(megaRandomPic(megacslist))
|
||||
|
||||
def buildDic(dic, logfile):
|
||||
log = open(logfile, "r")
|
||||
for line in log:
|
||||
#print("L: " + line)
|
||||
pre = ("__firstword__", "__firstword__")
|
||||
for word in line.split():
|
||||
#print("W: " + word)
|
||||
addInDic(dic, word, pre)
|
||||
pre = (pre[1], word)
|
||||
addInDic(dic, "__eol__", pre)
|
||||
|
||||
def addInDic(dic, w, pre):
|
||||
if not pre in dic:
|
||||
dic[pre] = {}
|
||||
if not w in dic[pre]:
|
||||
dic[pre][w] = 1
|
||||
else:
|
||||
dic[pre][w] = dic[pre][w] + 1
|
||||
|
||||
def megaComputeProbCSList(megadic):
|
||||
megacsdic = {}
|
||||
for dic in megadic.items():
|
||||
cslist = computeProbCSList(dic[1])
|
||||
megacsdic[dic[0]] = cslist
|
||||
return megacsdic
|
||||
|
||||
def computeProbCSList(dic):
|
||||
tsum = sum(dic.values())
|
||||
sortedDic = sorted(dic.items(), key=lambda x:x[1])
|
||||
sortedDicProb = []
|
||||
dsum = 0
|
||||
for t in sortedDic:
|
||||
dsum = t[1]/tsum + dsum
|
||||
sortedDicProb.append((t[0], dsum))
|
||||
return sortedDicProb
|
||||
|
||||
def megaRandomPic(megacsdic):
|
||||
index = ("__firstword__", "__firstword__")
|
||||
currentw = randomPick(megacsdic[index])
|
||||
string = ""
|
||||
while currentw != "__eol__":
|
||||
string = string + currentw + " "
|
||||
index = (index[1], currentw)
|
||||
currentw = randomPick(megacsdic[index])
|
||||
return string
|
||||
|
||||
def randomPick(CSlist):
|
||||
r = random.random()
|
||||
for t in CSlist:
|
||||
if r < t[1]:
|
||||
return t[0]
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user