Add distrbution & markov

This commit is contained in:
Feufochmar 2019-11-04 17:22:49 +01:00
commit 9c93990b21
2 changed files with 238 additions and 0 deletions

130
distribution.rkt Normal file
View File

@ -0,0 +1,130 @@
#lang racket/base
(provide
make-distribution
distribution-pick-from
distribution-add-to!
distribution-contains?
distribution-check-only
distribution-linear-combination
distribution-items
distribution->jsexpr
jsexpr->distribution)
; Distribution structure
; Distributions are list of items associated to a number indicating its frequency.
; When an element is picked from the list, the frequency of the items is taken into account.
(struct distribution
([total #:mutable] ; total number of elements in the distribution
[lst-items #:mutable] ; same as items, but in a assoc-list
items)) ; hash item -> number of occurences
; Add an item to the distribution, with its number of occurences
(define (distribution-add-to! d itm [additionnal-occurences 1])
(let ((occurences (hash-ref (distribution-items d) itm 0)))
(hash-set! (distribution-items d) itm (+ occurences additionnal-occurences))
(set-distribution-lst-items! d (hash->list (distribution-items d)))
(set-distribution-total! d (+ (distribution-total d) additionnal-occurences))
))
; Syntax to build a distribution from the items and theirs occurences
; Note: the item is quoted
(define-syntax make-distribution
(syntax-rules (*)
; No arguments - default constructor
((make-distribution)
(distribution 0 (list) (make-hash)))
; default syntax
((make-distribution (id val) ...)
(let ((dist (make-distribution)))
(begin
(distribution-add-to! dist (quote id) val) ...)
dist))
; From a list and default values
((make-distribution lst (* default-val))
(let ((dist (make-distribution)))
(for-each
(lambda (x) (distribution-add-to! dist x default-val))
lst)
dist))
; From a list, default values and values overriding the defaults
((make-distribution lst (* default-val) (id val) ...)
(let ((h (make-hash))
(dist (make-distribution)))
(for-each
(lambda (x) (hash-set! h x default-val))
lst)
(begin
(hash-set! h (quote id) val) ...)
(hash-for-each h (lambda (k v) (distribution-add-to! dist k v)))
dist))
; With a useless list
((make-distribution lst (id val) ...)
(let ((dist (make-distribution)))
(begin
(distribution-add-to! dist (quote id) val) ...)
dist))
))
; Pick from a distribution
(define (distribution-pick-from d)
(letrec ((search
(lambda (lst subtotal roll)
(let ((new-subtotal (+ subtotal (cdar lst))))
(if (< roll new-subtotal)
(caar lst)
(search (cdr lst) new-subtotal roll)))
)))
(search (distribution-lst-items d) 0 (random (distribution-total d)))
))
; Check if an item is in the distribution
(define (distribution-contains? d itm)
(hash-has-key? (distribution-items d) itm))
; Check if there are unkown items in the distribution
; Raise an error if an unkown item is present
(define (distribution-check-only d known-items)
(hash-for-each
(distribution-items d)
(lambda (k v)
(when (not (member k known-items))
(error "Distribution contains an unknown item: " k)))))
; Linear combination of distributions
; Make a new distribution from a dotted list of distributions and their weight
(define (distribution-linear-combination dists)
(let ((d (make-distribution)))
(for-each
(lambda (x)
(let ((itms
(hash-map
(distribution-items (car x))
(lambda (k v)
(cons k (/ v (distribution-total (car x))))))))
(for-each
(lambda (itm)
(distribution-add-to! d (car itm) (truncate (* (cdr x) (cdr itm) 100)))) ; * 100 and truncate because (cdr itm) is reduced
itms)))
dists)
d))
; Serialisation to a json-compatible structure
; It is possible to pass a translator function for converting values that are not compatible to json
(define (distribution->jsexpr dist #:translator [tr (lambda (x) x)])
(hash-map
(distribution-items dist)
(lambda (k v)
(make-immutable-hash
`((value . ,(tr k))
(count . ,v))))))
; Deserialisation from a json-compatible structure
; It is possible to pass a translator function for converting values that are not compatible to json
(define (jsexpr->distribution js #:translator [tr (lambda (x) x)])
(define dist (make-distribution))
(for-each
(lambda (x)
(distribution-add-to! dist (tr (hash-ref x 'value)) (hash-ref x 'count)))
js)
dist)

108
markov.rkt Normal file
View File

@ -0,0 +1,108 @@
#lang racket/base
(require
racket/list
"distribution.rkt")
(provide make-markov
markov-add-example!
markov-add-next-item!
markov-generate
markov->jsexpr
jsexpr->markov)
; Markov chain structure
(struct markov
(order ; order of the chain
next-items)) ; table of given sequence -> distributions of next possible items
; Constructor
(define (make-markov [order 1])
(markov order (make-hash)))
; Add a example to the generator. The example is a sequence of values in a list.
(define (markov-add-example! mrk example)
(letrec
((fill
(lambda (lst prev)
(let ((distro (hash-ref (markov-next-items mrk) prev #f))
(next (if (null? lst) #f (car lst))))
(if distro
(distribution-add-to! distro next)
(let ((new-distro (make-distribution)))
(distribution-add-to! new-distro next)
(hash-set! (markov-next-items mrk) prev new-distro)))
(when next (fill (cdr lst) (append (cdr prev) (list next))))
))))
(fill example (make-list (markov-order mrk) #f))
))
; Add a possible next item from a sequence
; Use this to regenerate a markov chain from a serialized version (ie not from examples)
(define (markov-add-next-item! mrk seq next [occurences 1])
(let ((distro (hash-ref (markov-next-items mrk) seq #f)))
(if distro
(distribution-add-to! distro next occurences)
(let ((new-distro (make-distribution)))
(distribution-add-to! new-distro next occurences)
(hash-set! (markov-next-items mrk) seq new-distro)))))
; Generate a list with the given initial sequence
(define (generate-list next-items result previous)
(let ((next (distribution-pick-from (hash-ref next-items previous))))
(if (not next)
(reverse result)
(generate-list next-items (cons next result) (append (cdr previous) (list next))))))
; Get a suitable starting sequence from the given list
(define (find-starting-sequence next-items initial)
; If the given sequence is a suitable sequence, return it
(if (hash-has-key? next-items initial)
initial
(letrec
; rating function to compare two sequences
((rate
(lambda (result a b)
(if (or (null? a) (null? b))
result
(rate (+ result (if (equal? (car a) (car b)) 1 0)) (cdr a) (cdr b))))))
; the suitable starting sequence is the one that get the best result with the rate function when compared to the initial sequence
(caar
(sort
(map
(lambda (lst) (cons lst (rate 0 (reverse lst) (reverse initial)))) ; The starting sequence is more suitable is we start from the last elements
(shuffle
(hash-keys next-items)))
(lambda (a b) (> (cdr a) (cdr b))))))))
; Generate a list from the chain
(define (markov-generate mrk [initial #f])
(if initial
(let ((prev (find-starting-sequence (markov-next-items mrk) initial)))
(generate-list (markov-next-items mrk) (reverse initial) prev))
(generate-list (markov-next-items mrk) (list) (make-list (markov-order mrk) #f))))
; Serialisation to a json-compatible structure
; It is possible to pass a translator function for converting values that are not compatible to json
(define (markov->jsexpr mrk #:translator [tr (lambda (x) x)])
(make-immutable-hash
`((order . ,(markov-order mrk))
(next . ,(hash-map
(markov-next-items mrk)
(lambda (k v)
(make-immutable-hash
`((sequence . ,(map tr k))
(distribution . ,(distribution->jsexpr v #:translator tr)))))))
)))
; Deserialisation from a json-compatible structure
; It is possible to pass a translator function for converting values that are not compatible to json
(define (jsexpr->markov js #:translator [tr (lambda (x) x)])
(define mrk (make-markov (hash-ref js 'order)))
(for-each
(lambda (x)
(hash-set!
(markov-next-items mrk)
(map tr (hash-ref x 'sequence))
(jsexpr->distribution (hash-ref x 'distribution) #:translator tr)))
(hash-ref js 'next))
mrk)