#!/usr/bin/python # -*- coding: utf-8 -*- # \file markov.py # \brief TODO # \author Florent Guiotte # \version 0.2 # \date 04 sept. 2017 # # TODO details import sys import random import pickle import getopt def main(): # parse command line options opts, args = getopt.getopt(sys.argv[1:], "hf:n:o:l:", ["help","create-dic"]) create = False loadpk = "out.pickle" output = "out.pickle" infile = "markov.log" nb = 1 for o, a in opts: if '--create-dic' == o: create = True elif o in ('--help', '-h'): print(__doc__) sys.exit(0) elif o == "-o": output = a elif o == "-f": infile = a elif o == "-n": nb = int(a) elif o == "-l": loadpk = a if create: megadic = {} buildDic(megadic, infile) #cslist = computeProbCSList(dic) #print(randomPick(cslist)) megacslist = megaComputeProbCSList(megadic) #print(megacslist) pickle.dump(megacslist, open(output, 'wb')) else: try: megacslist = pickle.load(open(loadpk, 'rb')) except: print("No pickle file provided") sys.exit(2) #print(megacslist) for i in range(nb): print(megaRandomPic(megacslist)) def buildDic(dic, logfile): log = open(logfile, "r") for line in log: #print("L: " + line) pre = ("__firstword__", "__firstword__") for word in line.split(): #print("W: " + word) addInDic(dic, word, pre) pre = (pre[1], word) addInDic(dic, "__eol__", pre) def addInDic(dic, w, pre): if not pre in dic: dic[pre] = {} if not w in dic[pre]: dic[pre][w] = 1 else: dic[pre][w] = dic[pre][w] + 1 def megaComputeProbCSList(megadic): megacsdic = {} for dic in megadic.items(): cslist = computeProbCSList(dic[1]) megacsdic[dic[0]] = cslist return megacsdic def computeProbCSList(dic): tsum = sum(dic.values()) sortedDic = sorted(dic.items(), key=lambda x:x[1]) sortedDicProb = [] dsum = 0 for t in sortedDic: dsum = t[1]/tsum + dsum sortedDicProb.append((t[0], dsum)) return sortedDicProb def megaRandomPic(megacsdic): index = ("__firstword__", "__firstword__") currentw = randomPick(megacsdic[index]) string = "" while currentw != "__eol__": string = string + currentw + " " index = (index[1], currentw) currentw = randomPick(megacsdic[index]) return string def randomPick(CSlist): r = random.random() for t in CSlist: if r < t[1]: return t[0] return None if __name__ == "__main__": main()