Add distrbution & markov
This commit is contained in:
commit
9c93990b21
|
@ -0,0 +1,130 @@
|
||||||
|
#lang racket/base
|
||||||
|
|
||||||
|
(provide
|
||||||
|
make-distribution
|
||||||
|
distribution-pick-from
|
||||||
|
distribution-add-to!
|
||||||
|
distribution-contains?
|
||||||
|
distribution-check-only
|
||||||
|
distribution-linear-combination
|
||||||
|
distribution-items
|
||||||
|
distribution->jsexpr
|
||||||
|
jsexpr->distribution)
|
||||||
|
|
||||||
|
; Distribution structure
|
||||||
|
; Distributions are list of items associated to a number indicating its frequency.
|
||||||
|
; When an element is picked from the list, the frequency of the items is taken into account.
|
||||||
|
(struct distribution
|
||||||
|
([total #:mutable] ; total number of elements in the distribution
|
||||||
|
[lst-items #:mutable] ; same as items, but in a assoc-list
|
||||||
|
items)) ; hash item -> number of occurences
|
||||||
|
|
||||||
|
; Add an item to the distribution, with its number of occurences
|
||||||
|
(define (distribution-add-to! d itm [additionnal-occurences 1])
|
||||||
|
(let ((occurences (hash-ref (distribution-items d) itm 0)))
|
||||||
|
(hash-set! (distribution-items d) itm (+ occurences additionnal-occurences))
|
||||||
|
(set-distribution-lst-items! d (hash->list (distribution-items d)))
|
||||||
|
(set-distribution-total! d (+ (distribution-total d) additionnal-occurences))
|
||||||
|
))
|
||||||
|
|
||||||
|
; Syntax to build a distribution from the items and theirs occurences
|
||||||
|
; Note: the item is quoted
|
||||||
|
(define-syntax make-distribution
|
||||||
|
(syntax-rules (*)
|
||||||
|
; No arguments - default constructor
|
||||||
|
((make-distribution)
|
||||||
|
(distribution 0 (list) (make-hash)))
|
||||||
|
; default syntax
|
||||||
|
((make-distribution (id val) ...)
|
||||||
|
(let ((dist (make-distribution)))
|
||||||
|
(begin
|
||||||
|
(distribution-add-to! dist (quote id) val) ...)
|
||||||
|
dist))
|
||||||
|
; From a list and default values
|
||||||
|
((make-distribution lst (* default-val))
|
||||||
|
(let ((dist (make-distribution)))
|
||||||
|
(for-each
|
||||||
|
(lambda (x) (distribution-add-to! dist x default-val))
|
||||||
|
lst)
|
||||||
|
dist))
|
||||||
|
; From a list, default values and values overriding the defaults
|
||||||
|
((make-distribution lst (* default-val) (id val) ...)
|
||||||
|
(let ((h (make-hash))
|
||||||
|
(dist (make-distribution)))
|
||||||
|
(for-each
|
||||||
|
(lambda (x) (hash-set! h x default-val))
|
||||||
|
lst)
|
||||||
|
(begin
|
||||||
|
(hash-set! h (quote id) val) ...)
|
||||||
|
(hash-for-each h (lambda (k v) (distribution-add-to! dist k v)))
|
||||||
|
dist))
|
||||||
|
; With a useless list
|
||||||
|
((make-distribution lst (id val) ...)
|
||||||
|
(let ((dist (make-distribution)))
|
||||||
|
(begin
|
||||||
|
(distribution-add-to! dist (quote id) val) ...)
|
||||||
|
dist))
|
||||||
|
))
|
||||||
|
|
||||||
|
; Pick from a distribution
|
||||||
|
(define (distribution-pick-from d)
|
||||||
|
(letrec ((search
|
||||||
|
(lambda (lst subtotal roll)
|
||||||
|
(let ((new-subtotal (+ subtotal (cdar lst))))
|
||||||
|
(if (< roll new-subtotal)
|
||||||
|
(caar lst)
|
||||||
|
(search (cdr lst) new-subtotal roll)))
|
||||||
|
)))
|
||||||
|
(search (distribution-lst-items d) 0 (random (distribution-total d)))
|
||||||
|
))
|
||||||
|
|
||||||
|
; Check if an item is in the distribution
|
||||||
|
(define (distribution-contains? d itm)
|
||||||
|
(hash-has-key? (distribution-items d) itm))
|
||||||
|
|
||||||
|
; Check if there are unkown items in the distribution
|
||||||
|
; Raise an error if an unkown item is present
|
||||||
|
(define (distribution-check-only d known-items)
|
||||||
|
(hash-for-each
|
||||||
|
(distribution-items d)
|
||||||
|
(lambda (k v)
|
||||||
|
(when (not (member k known-items))
|
||||||
|
(error "Distribution contains an unknown item: " k)))))
|
||||||
|
|
||||||
|
; Linear combination of distributions
|
||||||
|
; Make a new distribution from a dotted list of distributions and their weight
|
||||||
|
(define (distribution-linear-combination dists)
|
||||||
|
(let ((d (make-distribution)))
|
||||||
|
(for-each
|
||||||
|
(lambda (x)
|
||||||
|
(let ((itms
|
||||||
|
(hash-map
|
||||||
|
(distribution-items (car x))
|
||||||
|
(lambda (k v)
|
||||||
|
(cons k (/ v (distribution-total (car x))))))))
|
||||||
|
(for-each
|
||||||
|
(lambda (itm)
|
||||||
|
(distribution-add-to! d (car itm) (truncate (* (cdr x) (cdr itm) 100)))) ; * 100 and truncate because (cdr itm) is reduced
|
||||||
|
itms)))
|
||||||
|
dists)
|
||||||
|
d))
|
||||||
|
|
||||||
|
; Serialisation to a json-compatible structure
|
||||||
|
; It is possible to pass a translator function for converting values that are not compatible to json
|
||||||
|
(define (distribution->jsexpr dist #:translator [tr (lambda (x) x)])
|
||||||
|
(hash-map
|
||||||
|
(distribution-items dist)
|
||||||
|
(lambda (k v)
|
||||||
|
(make-immutable-hash
|
||||||
|
`((value . ,(tr k))
|
||||||
|
(count . ,v))))))
|
||||||
|
|
||||||
|
; Deserialisation from a json-compatible structure
|
||||||
|
; It is possible to pass a translator function for converting values that are not compatible to json
|
||||||
|
(define (jsexpr->distribution js #:translator [tr (lambda (x) x)])
|
||||||
|
(define dist (make-distribution))
|
||||||
|
(for-each
|
||||||
|
(lambda (x)
|
||||||
|
(distribution-add-to! dist (tr (hash-ref x 'value)) (hash-ref x 'count)))
|
||||||
|
js)
|
||||||
|
dist)
|
|
@ -0,0 +1,108 @@
|
||||||
|
#lang racket/base
|
||||||
|
|
||||||
|
(require
|
||||||
|
racket/list
|
||||||
|
"distribution.rkt")
|
||||||
|
(provide make-markov
|
||||||
|
markov-add-example!
|
||||||
|
markov-add-next-item!
|
||||||
|
markov-generate
|
||||||
|
markov->jsexpr
|
||||||
|
jsexpr->markov)
|
||||||
|
|
||||||
|
; Markov chain structure
|
||||||
|
(struct markov
|
||||||
|
(order ; order of the chain
|
||||||
|
next-items)) ; table of given sequence -> distributions of next possible items
|
||||||
|
|
||||||
|
; Constructor
|
||||||
|
(define (make-markov [order 1])
|
||||||
|
(markov order (make-hash)))
|
||||||
|
|
||||||
|
; Add a example to the generator. The example is a sequence of values in a list.
|
||||||
|
(define (markov-add-example! mrk example)
|
||||||
|
(letrec
|
||||||
|
((fill
|
||||||
|
(lambda (lst prev)
|
||||||
|
(let ((distro (hash-ref (markov-next-items mrk) prev #f))
|
||||||
|
(next (if (null? lst) #f (car lst))))
|
||||||
|
(if distro
|
||||||
|
(distribution-add-to! distro next)
|
||||||
|
(let ((new-distro (make-distribution)))
|
||||||
|
(distribution-add-to! new-distro next)
|
||||||
|
(hash-set! (markov-next-items mrk) prev new-distro)))
|
||||||
|
(when next (fill (cdr lst) (append (cdr prev) (list next))))
|
||||||
|
))))
|
||||||
|
(fill example (make-list (markov-order mrk) #f))
|
||||||
|
))
|
||||||
|
|
||||||
|
; Add a possible next item from a sequence
|
||||||
|
; Use this to regenerate a markov chain from a serialized version (ie not from examples)
|
||||||
|
(define (markov-add-next-item! mrk seq next [occurences 1])
|
||||||
|
(let ((distro (hash-ref (markov-next-items mrk) seq #f)))
|
||||||
|
(if distro
|
||||||
|
(distribution-add-to! distro next occurences)
|
||||||
|
(let ((new-distro (make-distribution)))
|
||||||
|
(distribution-add-to! new-distro next occurences)
|
||||||
|
(hash-set! (markov-next-items mrk) seq new-distro)))))
|
||||||
|
|
||||||
|
; Generate a list with the given initial sequence
|
||||||
|
(define (generate-list next-items result previous)
|
||||||
|
(let ((next (distribution-pick-from (hash-ref next-items previous))))
|
||||||
|
(if (not next)
|
||||||
|
(reverse result)
|
||||||
|
(generate-list next-items (cons next result) (append (cdr previous) (list next))))))
|
||||||
|
|
||||||
|
; Get a suitable starting sequence from the given list
|
||||||
|
(define (find-starting-sequence next-items initial)
|
||||||
|
; If the given sequence is a suitable sequence, return it
|
||||||
|
(if (hash-has-key? next-items initial)
|
||||||
|
initial
|
||||||
|
(letrec
|
||||||
|
; rating function to compare two sequences
|
||||||
|
((rate
|
||||||
|
(lambda (result a b)
|
||||||
|
(if (or (null? a) (null? b))
|
||||||
|
result
|
||||||
|
(rate (+ result (if (equal? (car a) (car b)) 1 0)) (cdr a) (cdr b))))))
|
||||||
|
; the suitable starting sequence is the one that get the best result with the rate function when compared to the initial sequence
|
||||||
|
(caar
|
||||||
|
(sort
|
||||||
|
(map
|
||||||
|
(lambda (lst) (cons lst (rate 0 (reverse lst) (reverse initial)))) ; The starting sequence is more suitable is we start from the last elements
|
||||||
|
(shuffle
|
||||||
|
(hash-keys next-items)))
|
||||||
|
(lambda (a b) (> (cdr a) (cdr b))))))))
|
||||||
|
|
||||||
|
; Generate a list from the chain
|
||||||
|
(define (markov-generate mrk [initial #f])
|
||||||
|
(if initial
|
||||||
|
(let ((prev (find-starting-sequence (markov-next-items mrk) initial)))
|
||||||
|
(generate-list (markov-next-items mrk) (reverse initial) prev))
|
||||||
|
(generate-list (markov-next-items mrk) (list) (make-list (markov-order mrk) #f))))
|
||||||
|
|
||||||
|
; Serialisation to a json-compatible structure
|
||||||
|
; It is possible to pass a translator function for converting values that are not compatible to json
|
||||||
|
(define (markov->jsexpr mrk #:translator [tr (lambda (x) x)])
|
||||||
|
(make-immutable-hash
|
||||||
|
`((order . ,(markov-order mrk))
|
||||||
|
(next . ,(hash-map
|
||||||
|
(markov-next-items mrk)
|
||||||
|
(lambda (k v)
|
||||||
|
(make-immutable-hash
|
||||||
|
`((sequence . ,(map tr k))
|
||||||
|
(distribution . ,(distribution->jsexpr v #:translator tr)))))))
|
||||||
|
)))
|
||||||
|
|
||||||
|
; Deserialisation from a json-compatible structure
|
||||||
|
; It is possible to pass a translator function for converting values that are not compatible to json
|
||||||
|
(define (jsexpr->markov js #:translator [tr (lambda (x) x)])
|
||||||
|
(define mrk (make-markov (hash-ref js 'order)))
|
||||||
|
(for-each
|
||||||
|
(lambda (x)
|
||||||
|
(hash-set!
|
||||||
|
(markov-next-items mrk)
|
||||||
|
(map tr (hash-ref x 'sequence))
|
||||||
|
(jsexpr->distribution (hash-ref x 'distribution) #:translator tr)))
|
||||||
|
(hash-ref js 'next))
|
||||||
|
mrk)
|
Loading…
Reference in New Issue