﻿
//
// xspeak.js -- corpspeak, webspeak, ... speech generator
//
// Copyright 2007 Chris Pirazzi chris@pirazzi.net
// Except sprintf() which is public domain from Ash Searle (see below)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
// 
// You can find a copy of the GNU Affero General Public License
// at <http://lurkertech.com/agpl-3.0.txt> and also at
// <http://www.gnu.org/licenses/>.
//

function assert(val)
{
    if (!val)
    {
        debugger;
        alert('assertion failure');
    }
}

function check_user(val, message)
{
    if (!val)
    {
		throw new Error(message);
    }
}

var user_agent = navigator.userAgent.toLowerCase();
var n4=(document.layers);
var n6=(document.getElementById&&!document.all);
var ie=(document.all);
var o6=(user_agent.indexOf("opera") != -1);
var safari=(user_agent.indexOf("safari") != -1);
var msie  =(user_agent.indexOf("msie") != -1) && 
           (user_agent.indexOf("opera") == -1);

// sprintf from Ash Searle 2007.04.27 http://hexmen.com/js/sprintf.js
function sprintf() 
{
    function pad(str, len, chr, leftJustify) 
    {
	    var padding = (str.length >= len) ? '' : Array(1 + len - str.length >>> 0).join(chr);
	    return leftJustify ? str + padding : padding + str;

    }

    function justify(value, prefix, leftJustify, minWidth, zeroPad) {
	    var diff = minWidth - value.length;
	    if (diff > 0) {
	        if (leftJustify || !zeroPad) {
		        value = pad(value, minWidth, ' ', leftJustify);
	        } else {
		        value = value.slice(0, prefix.length) + pad('', diff, '0', true) + value.slice(prefix.length);
	        }
	    }
	    return value;
    }

    function formatBaseX(value, base, prefix, leftJustify, minWidth, precision, zeroPad) {
	    // Note: casts negative numbers to positive ones
	    var number = value >>> 0;
	    prefix = prefix && number && {'2': '0b', '8': '0', '16': '0x'}[base] || '';
	    value = prefix + pad(number.toString(base), precision || 0, '0', false);
	    return justify(value, prefix, leftJustify, minWidth, zeroPad);
    }

    function formatString(value, leftJustify, minWidth, precision, zeroPad) {
	    if (precision != null) {
	        value = value.slice(0, precision);
	    }
	    return justify(value, '', leftJustify, minWidth, zeroPad);
    }

    var a = arguments, i = 0, format = a[i++];
    return format.replace(sprintf.regex, function(substring, valueIndex, flags, minWidth, _, precision, type) {
	    if (substring == '%%') return '%';

	    // parse flags
	    var leftJustify = false, positivePrefix = '', zeroPad = false, prefixBaseX = false;
	    for (var j = 0; flags && j < flags.length; j++) switch (flags.charAt(j)) {
		case ' ': positivePrefix = ' '; break;
		case '+': positivePrefix = '+'; break;
		case '-': leftJustify = true; break;
		case '0': zeroPad = true; break;
		case '#': prefixBaseX = true; break;
	    }

	    // parameters may be null, undefined, empty-string or real valued
	    // we want to ignore null, undefined and empty-string values

	    if (!minWidth) {
		    minWidth = 0;
	    } else if (minWidth == '*') {
		    minWidth = +a[i++];
	    } else if (minWidth.charAt(0) == '*') {
		    minWidth = +a[minWidth.slice(1, -1)];
	    } else {
		    minWidth = +minWidth;
	    }

	    // Note: undocumented perl feature:
	    if (minWidth < 0) {
		    minWidth = -minWidth;
		    leftJustify = true;
	    }

	    if (!isFinite(minWidth)) {
		    throw new Error('sprintf: (minimum-)width must be finite');
	    }

	    if (!precision) {
		    precision = 'fFeE'.indexOf(type) > -1 ? 6 : (type == 'd') ? 0 : void(0);
	    } else if (precision == '*') {
		    precision = +a[i++];
	    } else if (precision.charAt(0) == '*') {
		    precision = +a[precision.slice(1, -1)];
	    } else {
		    precision = +precision;
	    }

	    // grab value using valueIndex if required?
	    var value = valueIndex ? a[valueIndex.slice(0, -1)] : a[i++];

	    switch (type) {
		case 's': return formatString(String(value), leftJustify, minWidth, precision, zeroPad);
		case 'c': return formatString(String.fromCharCode(+value), leftJustify, minWidth, precision, zeroPad);
		case 'b': return formatBaseX(value, 2, prefixBaseX, leftJustify, minWidth, precision, zeroPad);
		case 'o': return formatBaseX(value, 8, prefixBaseX, leftJustify, minWidth, precision, zeroPad);
		case 'x': return formatBaseX(value, 16, prefixBaseX, leftJustify, minWidth, precision, zeroPad);
		case 'X': return formatBaseX(value, 16, prefixBaseX, leftJustify, minWidth, precision, zeroPad).toUpperCase();
		case 'u': return formatBaseX(value, 10, prefixBaseX, leftJustify, minWidth, precision, zeroPad);
		case 'i':
		case 'd': {
			var number = parseInt(+value);
			var prefix = number < 0 ? '-' : positivePrefix;
			value = prefix + pad(String(Math.abs(number)), precision, '0', false);
			return justify(value, prefix, leftJustify, minWidth, zeroPad);
		}
		case 'e':
		case 'E':
		case 'f':
		case 'F':
		case 'g':
		case 'G':
		    {
			    var number = +value;
			    var prefix = number < 0 ? '-' : positivePrefix;
			    var method = ['toExponential', 'toFixed', 'toPrecision']['efg'.indexOf(type.toLowerCase())];
			    var textTransform = ['toString', 'toUpperCase']['eEfFgG'.indexOf(type) % 2];
			    value = prefix + Math.abs(number)[method](precision);
			    return justify(value, prefix, leftJustify, minWidth, zeroPad)[textTransform]();
			}
		default: return substring;
	    }
	});
}
sprintf.regex = /%%|%(\d+\$)?([-+#0 ]*)(\*\d+\$|\*|\d+)?(\.(\*\d+\$|\*|\d+))?([scboxXuidfegEG])/g;

// does what 'prop in obj' should have done.
// returns if the object defines the property itself
// (not from some superclass object, sigh.).
//
function real_in(prop, obj)
{
    // see http://yuiblog.com/blog/2006/09/26/for-in-intrigue/
    return (prop in obj && obj.hasOwnProperty(prop));
}

//
// creates regexp that matches specified literal string
// (escaping all possible regexp metacharacters like . and *)
//
function string_to_re(s)
{
    s = s.replace(/./, function(c)
                  {
                      var code = c.charCodeAt(0);
                      return sprintf("\\u%04x", code);
                  });
    return new RegExp(s);
}

// does what String.split() should have done, but IE
// completely fails to do correctly.  IE's split
// does not include an array element if the string
// begins with the separator, nor does it include
// an array element if the string has two separators
// in a row.  hello??
//
function real_split(s, separator)
{
    if (!msie)
        return s.split(separator); // real browser

    // IE: slow painful broken browser

    var re = (separator instanceof RegExp) ? 
        separator : string_to_re(separator);
    re = new RegExp(re.source, 'g');
    re.lastIndex = 0;
    var a = [];

    var lastIndex = 0;
    var res;
    while (null != (res=re.exec(s)))
    {
        a.push(s.substr(lastIndex, res.index-lastIndex));
        lastIndex = res.index + res[0].length;
    }
    a.push(s.substr(lastIndex));

    return a;
}

// testing
//alert(Dumper(real_split("*pork*", '*')));
//alert(Dumper(real_split("pork", '*')));
//alert(Dumper(real_split("*pork**", '*')));
//alert(Dumper(real_split("*pork***", '*')));
//alert(Dumper(real_split("*pork*pie*", '*')));
//alert(Dumper(real_split("*pork**pie*", '*')));
//
//alert(Dumper(real_split(".pork.", /\./)));
//alert(Dumper(real_split("pork", /\./)));
//alert(Dumper(real_split(".pork..", /\./)));
//alert(Dumper(real_split(".pork...", /\./)));
//alert(Dumper(real_split(".pork.pie.", /\./)));
//alert(Dumper(real_split(".pork..pie.", /\./)));

function trim_lead_trail_space(s)
{
    assert(undefined != s);
    s = s.replace(/^\s+/, '');
    s = s.replace(/\s+$/, '');
    return s;
}

function merge_objs(obj1, obj2)
{
    var obj = {};
    for (var prop in obj1) 
    {
        // see http://yuiblog.com/blog/2006/09/26/for-in-intrigue/
        if (!obj1.hasOwnProperty(prop))
            continue;
        obj[prop] = obj1[prop];
    }
    for (var prop in obj2) 
    {
        // see http://yuiblog.com/blog/2006/09/26/for-in-intrigue/
        if (!obj2.hasOwnProperty(prop))
            continue;
        obj[prop] = obj2[prop];
    }
    return obj;
}

//
// parse pvstrings of the form prop=value,prop=value
//
// default_value is JavaScript value in cases like: prop=1,prop2=
//
// if obj is passed, it is modified.  returns the new/modified object.
//
function pvstring_to_obj(str, default_value, obj)
{
    assert(undefined != str);
    if (undefined == obj) obj = {};
    if ('' == str) return obj;
    var a = real_split(str, ',');
    for(var i=0; i < a.length; i++)
    {
        var pv = real_split(a[i], '=');
        var p;
        var v;
        if (0 == pv.length)
            continue;
        else if (1 == pv.length)
        {
            p = pv[0];
            v = default_value;
        }
        else if (2 == pv.length)
        {
            p = pv[0];
            v = pv[1];
        }
        else
        {
            check_user(0,
                       "confused by notation: 'a=b=c'");
        }
        p = trim_lead_trail_space("" + p);
        v = trim_lead_trail_space("" + v);
        check_user(p.length > 0, "missing flag name before '='");
        check_user(p.search(/[^\w\^\|]/) < 0,
                   "flag name [" + p + "contains illegal character");
        check_user(v.search(/[^\w\^\|]/) < 0,
                   "flag value [" + p + "contains illegal character");
        obj[p] = v;
    }
    return obj;
}

//
// convert obj to pvstring of the form prop=value,prop=value
//
// JavaScript undefined values map to '': prop1=,prop2=
//
function obj_to_pvstring(obj)
{
    assert(undefined != obj);
    var props = [];
    for (var prop in obj) 
    {
        // see http://yuiblog.com/blog/2006/09/26/for-in-intrigue/
        if (!obj.hasOwnProperty(prop))
            continue;

        props.push(prop);
    }
    props = props.sort();
    var a = [];
    for (var i=0; i < props.length; i++)
    {
        var prop = props[i];
        var value = obj[prop];
        if (undefined == value) value = '';
        assert((""+ prop).search(/[^\w\^\|]/) < 0);
        assert((""+value).search(/[^\w\^\|]/) < 0);
        var str = prop + '=' + value;
        a.push(str);
    }
    return a.join(',');
}

//
// parse a getspec, which is a pair of pvstrings separated by '/'
//
// the first pvstring, ret.to_get, is parsed with default value undefined.
// the second pvstring, ret.to_set, is parsed with default value 1.
//
// the first pvstring must be nonempty.
// the second (and the '/' separator) is optional.
//
// returns object with properties shown above.
// to_set will be defined only if it is nonempty.
//
function parse_getspec(str)
{
    assert(undefined != str);
    var pvstrings = real_split(str, '/');
    check_user(undefined != pvstrings[0] && pvstrings[0].length > 0,
               "must list some flags before '/'");
    var ret = {};
    ret.to_get = pvstring_to_obj(pvstrings[0], undefined);
    if (undefined != pvstrings[1] && pvstrings[1].length > 0)
    {
        ret.to_set = pvstring_to_obj(pvstrings[1], 1);
    }
    return ret;
}

function irandom(max)
{
    return Math.floor(Math.random() * max);
}

function chance(good, total)
{
    return Math.random() * total < good;
}

function pick_one()
{
    var a;
    if (1 == arguments.length)
        a = arguments[0];
    else
        a = arguments;
    assert(a.length > 0);
    var idx = irandom(a.length);
    return a[idx];
}

function pick_one_and_remove(elts)
{
    assert(elts.length > 0);
    var idx = irandom(elts.length);
    var elt = elts[idx];
    elts[idx] = elts[elts.length-1];
    elts.pop();
    return elt;
}

//
// randomly pick one elt from elts_available and return it.
// if elts_available is empty, refresh elts_availble
// by copying the list from elts.
//
function pick_one_cyclical(elts_available, elts)
{
    assert(elts.length > 0);
    assert(undefined != elts_available);
    if (elts_available.length == 0)
    {
        for(i=0; i < elts.length; i++)
            elts_available.push(elts[i]);
    }
    var elt = pick_one_and_remove(elts_available);
    return elt;
}

function dict()
{
    var self = this;

    var entries = [];
    var cache = {};

    self.clear = function()
    {
        entries = [];
        cache = {};
    };

    self.add = function(elt)
    {
        entries.push(elt);
    };

    function check_match(qprop, qvalue, ent)
    {
        var eprops = real_split(qprop+'', '|');
        for(ep=0; ep < eprops.length; ep++)
        {
            var eprop = eprops[ep];
            
            if (undefined == qvalue) // user is testing for existence
            {
                if (real_in(eprop, ent))
                    return true;
            }
            else // user is testing for value
            {
                var evalues = real_split(qvalue+'', '|');
                for(ev=0; ev < evalues.length; ev++)
                {
                    var evalue = evalues[ev];

                    if (ent[eprop] == evalue)
                        return true;
                }
            }
        }
        return false;
    }

    self.select = function(query)
    {
        assert('object' == typeof query);

        var matches = [];

        for(var i=0; i < entries.length; i++)
        {
            var ent = entries[i];

            var match = 1;
            for (var qprop in query) 
            {
                // see http://yuiblog.com/blog/2006/09/26/for-in-intrigue/
                if (!query.hasOwnProperty(qprop))
                    continue;
                var qvalue = query[qprop];
                var invert = (qprop.search(/^\^/) >= 0);
                if (invert) qprop = qprop.substr(1);
                match = check_match(qprop, qvalue, ent);
                if (invert) match = !match;
                if (!match) break;
            }
            if (!match) continue;

            matches.push(ent);
        }
        
        return matches;
    };
    
    var cache = {};

    // select a random element which satisfies the query, and also remove
    // that element from further consideration for the query until
    // all other elements have been exhausted.
    //
    self.select1 = function(query)
    {
        assert('object' == typeof query);

        var key = obj_to_pvstring(query);

        var blob = cache[key];

        if (undefined == blob)
        {
            cache[key] = blob =
            {
                elts: self.select(query),
                elts_available: []
            };
            check_user(blob.elts.length > 0,
                       "there are no entries at all to satisfy your request " +
                       "for '" + key + "'");
        }

        return pick_one_cyclical(blob.elts_available, blob.elts);
    }
}
        
function xspeak()
{
    var self = this;

    // private stuff

    var last_bs_string = undefined;
    var dicts = {}; // one dict per cat
    
    self.code = '';
    self.code_line_number = 1;

    // call this with the complete xspeak configuration string
    //
    // after setup, you'll be able to make sentences, noun phrases, etc.
    //
    self.config = function(bs_string)
    {
        if (bs_string == last_bs_string)
        {
            // cheesy caching
            return;
        }

        last_bs_string = ''; // in case there's an error

        dicts = {};
        self.code = '';
        self.code_line_number = 1;

        // remove stupid DOS CRs, for IE
        var bs_string_LF = bs_string.replace(/\r/g, '');
        var bs_lines = real_split(bs_string_LF, '\n');

        var word_id = 0;
        var cat;
        var i;
        for(i=0; i < bs_lines.length; i++)
        {
            var line = bs_lines[i];

            if ('code' == cat)
            {
                self.code += line + '\n';
                continue;
            }

            // strip comments
            line = line.replace(/#.*/, '');
            
            // skip blank lines
            if (line.search(/\S/) < 0) continue;
            
            if ((a = line.match(/BEGIN:(.*)/)) != null)
            {
                cat = a[1];

                if ('code' == cat)
                {
                    // code starts on next line
                    self.code_line_number = i+1+1; // line numbers are 1-based
                    continue;
                }

                if (undefined == dicts[cat])
                {
                    dicts[cat] = new dict();
                }

                continue;
            }

            try 
            {
                check_user(undefined != cat,
                           "missing BEGIN: did you accidentally delete it?");
                assert(undefined != dicts[cat]);

                var elt = 
                    { 
                        cat: cat,
                        word_id: word_id++
                    };
                
                if ('sentence' == cat)
                {
                    elt.value = [];
                    var parts = real_split(line, /[\[\]]/);
                    for(var j=0; j < parts.length; j++)
                    {
                        // suck in raw text
                        parts[j] = trim_lead_trail_space(parts[j]);
                        if (parts[j].length > 0)
                            elt.value.push({ to_get: { value: parts[j] }});

                        j++;
                        if (j >= parts.length) break;

                        // suck in a [getspec]
                        parts[j] = trim_lead_trail_space(parts[j]);
                        elt.value.push(parse_getspec(parts[j]));
                    }
                    elt.value.push({ to_get: { value: ' ' }}); // 1 extra space
                    elt.capitalize = true, // sentences always need capitalize
                    dicts[cat].add(elt);
                }
                else // vocab
                {
                    var parts = real_split(line, ':');
                    for(var j=0; j < parts.length; j++)
                    {
                        parts[j] = trim_lead_trail_space(parts[j]);
                        if (0 == parts[j].length)
                            parts[j] = undefined;
                    }
                    if ('adj' == cat)
                    {
                        if (undefined != parts[0])
                            pvstring_to_obj(parts[0], 1, elt);
                        check_user(!(elt.a && elt.an),
                                   "cannot have both a and an");
                        elt.value = parts[1];
                        if (undefined != parts[2])
                            pvstring_to_obj(parts[2], 1, elt);
                        dicts[cat].add(elt);
                    }
                    else if ('noun' == cat)
                    {
                        if (undefined != parts[0])
                            pvstring_to_obj(parts[0], 1, elt);
                        var singular = parts[1];
                        var plural = parts[2];
                        if (undefined != parts[3])
                            pvstring_to_obj(parts[3], 1, elt);

                        check_user(!(elt.a && elt.an),
                                   "cannot have both a and an");
                        check_user(elt.a || elt.an || elt.the || elt.just,
                                   "must have at least one of a,an,the,just");

                        if (undefined != singular)
                        {
                            dicts[cat].add(
                                merge_objs(elt,
                                           {
                                               num: 'singular',
                                               value: singular
                                           }));
                        }
                        if (undefined != plural) // plural
                        {
                            if ('s' == plural)
                                plural = singular + 's';
                            dicts[cat].add(
                                merge_objs(elt,
                                           {
                                               num: 'plural',
                                               value: plural
                                           }));
                        }
                    }
                    else if ('verb' == cat)
                    {
                        var to = parts[0];
                        var you = parts[1];
                        var he = parts[2];
                        if (undefined != parts[3])
                            pvstring_to_obj(parts[3], 1, elt);
                        check_user(elt.intrans || elt.trans || elt.helping,
                                   "verb must have at least one of " +
                                   "intrans,trans,helping");
                        check_user((!elt.intrans && !elt.trans) ||
                                   to.length > 0,
                                   "you must specify a 'to' (infinitive) " +
                                   "form for verb since it can be " +
                                   "a helping verb");
                        
                        if (undefined != to)
                        {
                            check_user(to.search(/^to\s+/) >= 0,
                                       "you must write 'to " + to + "', " +
                                       "not just '" + to + "'.  " +
                                       "sorry for being pedantic.");
                            to = to.replace(/^to\s+/, '');
                            dicts[cat].add(
                                merge_objs(elt,
                                           {
                                               infl: 'to',
                                               value: to
                                           }));
                        }
                        if (undefined == you)
                        {
                            check_user(undefined != to,
                                       "if you want to omit the 'you' form, " +
                                       "you must include a 'to' form");
                            you = to;
                        }
                        else
                        {
                            check_user(you.search(/^you\s+/) >= 0,
                                       "you must write 'you " + you + "', " +
                                       "not just '" + you + "'.  " +
                                       "sorry for being pedantic.");
                            you = you.replace(/^you\s+/, '');
                        }
                        if (undefined != you)
                        {
                            dicts[cat].add(
                                merge_objs(elt,
                                           {
                                               infl: 'you',
                                               value: you
                                           }));
                        }
                        check_user(undefined != he,
                                   "you must specify a 'he' form");
                        if ('s' == he)
                        {
                            check_user(undefined != you,
                                       "if you want to write 's' " +
                                       "for the 'he' form, " +
                                       "you must include a " +
                                       "'to' or 'you' form");
                            var a = 
                                you.match(/^(\S+)(\s.*)$/) ||
                                you.match(/^(.*)$/);
                            he = a[1] + 's' +
                                ((undefined != a[2]) ? a[2] : '');
                        }
                        else
                        {
                            check_user(he.search(/^he\s+/) >= 0,
                                       "you must write 'he " + he + "', " +
                                       "not just '" + he + "'.  " +
                                       "sorry for being pedantic.");
                            he = he.replace(/^he\s+/, '');
                        }
                        dicts[cat].add(
                            merge_objs(elt,
                                       {
                                           infl: 'he',
                                           value: he
                                       }));
                    }
                    else
                    {
                        elt.value = parts[0];
                        if (undefined != parts[1])
                            pvstring_to_obj(parts[1], 1, elt);
                        dicts[cat].add(elt);
                    }
                }
            }
            catch(e)
            {
                // we don't use JS Error object because IE Error
                // doesn't have lineNumber -- LAME!
                var e2 =
                {
                    message:
                        "line: " + (i+1) + "\n" +
                        "category: " + cat + "\n" +
                        line + "\n" + 
                        e.message,
                    lineNumber: i+1
                };
                throw e2;
            }
        }

        last_bs_string = bs_string;
    };

    //
    // get a random element whose properties match the to_get object
    // you pass in, and set properties on the returned object as
    // specified by the optional to_set argument.
    //
    // call either with:
    // - a 'getspec' string, which encodes to_get and to_set, or
    // - a JavaScript object with { to_get: {...}, to_set: {...} }
    //
    self.get = function(getspec)
    {
        assert(undefined != getspec);
        if ('object' != typeof getspec)
            getspec = parse_getspec(getspec);
        assert(undefined != getspec.to_get);
        assert('object' == typeof getspec.to_get);
        
        var elt;

        if (undefined != getspec.to_get.value)
        {
            // they are passing in some kind of static text 
            // that already has a value.  no need to search.
            elt = getspec.to_get;
        }
        else if ('nphrase' == getspec.to_get.cat) 
        {
            elt = { cat: 'nphrase' };

            // getspec.to_get filters the noun chosen
            var n = 
                self.get({ to_get: merge_objs(getspec.to_get, {cat: 'noun'})});
                         
            elt = merge_objs(n, elt); // inherit other properties of noun

            var afterart;
            var adj;
            
            if (chance(1,2))
                afterart = n; // no adj
            else
                afterart = adj = self.get({to_get: {cat: 'adj'}});
            
            var doart;
            if (!n.a && !n.an && !n.the)
            {
                // no article allowed with n.  adj probably also unwise.
                doart = false;
                adj = undefined;
            }
            else if (!n.just && n.num!='plural') // plural can always be 'just'
            {
                // article is mandatory
                doart = true;
            }
            else
            {
                // could go either way: favor no article
                doart = chance(1,4);
            }
            
            elt.value = [];

            if (doart)
            {
                if (n.num=='plural' ||       // plural forces 'the'
                    (!n.a && !n.an) ||     // noun doesn't allow 'a'/'an'
                    (undefined != adj &&
                     !adj.a && !adj.an) || // adj doesn't allow 'a'/'an'
                    chance(1,5))           // leave it to chance.  favor a/an
                {
                    // the
                    elt.value.push({ value: 'the' });
                }
                else
                {
                    // an
                    elt.value.push({ value: (afterart.an ? 'an' : 'a') });
                }
            }
            
            if (adj)
                elt.value.push(adj);
            
            elt.value.push(n);
        }
        else if ('clause' == getspec.to_get.cat)
        {
            elt = { cat: 'clause' };
            
            elt.value = [];
            
            var subj = self.get({to_get: {cat: 'nphrase'}});
            
            elt.value.push(subj);

            if (chance(1,70)) 
                elt.value.push(self.get({to_get: {cat: 'interj'}}));

            var infl = ('singular'==subj.num) ? 'he' : 'you';
        
            if (chance(1,10))
            {
                // push helping verb
                elt.value.push(
                    self.get({to_get: {cat: 'verb', infl: infl, helping: 1}}));
                
                // then will need infinitive following helping verb
                infl = 'to';
            }
            
            if (chance(1,10))
                elt.value.push(self.get({to_get: {cat: 'adv'}}))

            // get a transitive or intransitive verb
            var v = 
                self.get({to_get: {cat: 'verb', 
                                   infl: infl, 
                                   'trans|intrans': undefined}});

            elt.value.push(v);
            
            if (chance(1,50)) 
                elt.value.push(self.get({to_get: {cat: 'interj'}}));
            
            if (v.trans)
                elt.value.push(self.get({to_get: {cat: 'nphrase'}}));
        }
        else if ('paragraph' == getspec.to_get.cat)
        {
            elt = { cat: 'paragraph' };
            elt.value = [];
            do 
            {
                elt.value.push(self.get({to_get: {cat: 'sentence'}}));
            } while (chance(9,10));
        }
        else // anything else comes from dicts, including 'sentence'
        {
            // var elts = dicts[getspec.to_get.cat].select(getspec.to_get);
            // elt = pick_one(elts);
            elt = dicts[getspec.to_get.cat].select1(getspec.to_get);
        }
        
        assert(undefined != elt);

        var to_set = getspec.to_set;

        // post-process sentence template into actual values
        //
        if ('sentence' == elt.cat)
        {
            // we'll need to replace each template entry in elt.value
            if (undefined == to_set) to_set = {};
            to_set.value = [];
            for(var i=0; i < elt.value.length; i++)
            {
                // each template entry is a getspec!
                to_set.value.push(self.get(elt.value[i]));
            }
        }

        // make changes to elt
        //
        if (undefined != to_set)
        {
            // must copy first, since elt could be from database.
            elt = merge_objs(elt, to_set); // copy and make changes
        }

        return elt;
    };

    function to_string(elt, state)
    {
        if ('object' == typeof elt.value)
        {
            if (elt.capitalize)
                state.needcap = true; // need to capitalize first alpha char

            for(var i=0; i < elt.value.length; i++)
                to_string(elt.value[i], state);

            if (elt.capitalize)
                state.needcap = false; // just in case object has no content
        }
        else
        {
            var v = elt.value;

            // no proper case mapping support in JavaScript, sigh.
            // real support would need to work on whole words.
            if (state.needcap && v.search(/\w/) >= 0)
            {
                v = v.replace(/\w/, function(s) { return s.toUpperCase(); });
                state.needcap = false;
            }

            // undirect directed double quotes
            v = v.replace(/“”/g, '"'); 

            // redirect double quotes
            while (v.search(/\"/) >= 0)
            {
                v = v.replace(/\"/, 
                              (state.inquote ? '”' : '“'));
                state.inquote = !state.inquote;
            }

            // add space before v if appropriate
            if (// not if we're at start of string
                state.lastbit.length > 0 &&
                // not if we're just beginning quote or paren or emdash
                state.lastbit.search(/[\(“—]$/) < 0 &&
                // not if we're just ending quote/paren/emdash or have punctuation
                v.search(/^[\,\;\:\.\?\!”\)—]/) < 0)
            {
                state.s += ' ';
            }
            
            state.s += v;

            if (v.length > 0)
                state.lastbit = v.substr(-1);
        }
    }
    
    //
    // convert structured output of get() into a string.
    //
    self.to_string = function(elt)
    {
        var state = { s: '', inquote: false, lastbit: '', needcap: false };
        to_string(elt, state);
        return state.s;
    }

    //
    // like get(), but returns string.
    //
    self.gets = function(getspec)
    {
        return self.to_string(self.get(getspec));
    }
}

function ei(obj) { return document.getElementById(obj); }

//
// given an arbitrary text string that is to be used in an HTML
// file (either by itself or as a value to some tag attribute),
// convert any characters that might possibly be interpreted
// as HTML (including <tags> and "values" and &entities) into
// HTML character entities.  very conservative.
//
function html_escape(s)
{
    assert(undefined != s);
    return (s+"").replace(/[^a-zA-Z0-9_ \n]/, 
                          function(s) { return '&#' + s.charCodeAt(0)+ ';' });
}

// makes simple HTTP element (old-school string, not DOM node)
// with specified attributes (a JavaScript object).
// html_escapes attribute values
//
function selt(tag, attrs)
{
    assert('string' == typeof tag);
    assert('object' == typeof attrs);
    assert(2 == arguments.length);

    var html = '<' + tag;
    for(attr in attrs)
    {
        // see http://yuiblog.com/blog/2006/09/26/for-in-intrigue/
        if (!attrs.hasOwnProperty(attr))
            continue;
        if (undefined != attrs[attr])
            html += ' ' + attr + '="' + html_escape(attrs[attr]) + '"';
        else
            html += ' ' + attr;
    }
    html += '>';
    return html;
}

// make complex HTML element with <tag>, </tag>, and
// contents inbetween.
//
// if (escape_contents)
//   html_escape contents first
//
function celt(tag, attrs, escape_contents, contents)
{
    assert('string' == typeof tag);
    assert('object' == typeof attrs);
    assert(undefined != escape_contents);
    assert('string' == typeof contents);
    assert(4 == arguments.length);

    if (escape_contents)
    {
        contents = html_escape(contents);
    }

    return selt(tag, attrs) + contents + '</' + tag + '>';
}

function picker(type)
{
    var self = this;

    if ('choices' == type)
    {
        self.choices = arguments;
        self.choices.shift();
    }
    else if ('numbered' == type)
    {
        var fmt = arguments[1];
        assert(fmt.search(/%/) >= 0);
        var count = arguments[2];
        assert(undefined != count);
        self.choices = [];
        for(var i=0; i < count; i++)
	        self.choices.push(sprintf(fmt, i));
    }
    else
        assert(0);

    self.choices_available = [];

    self.pick_one = function()
    {
        return pick_one_cyclical(self.choices_available,
                                 self.choices);
    };

    self.reset = function()
    {
        self.choices_available = [];
    }
}

//
// sets the selection (or the caret pos if start==end)
// on an input or textarea to the specified 0-based
// character.  for IE, a CRLF counts as one "character"
// even though it has length 2 and regexps match \r\n
//
function set_selection_range(obj, start, end)
{
//    console.log('start ' + start);
//    console.log('end ' + start);
    if (obj.setSelectionRange) // FireFox
    {
        obj.focus();
        obj.setSelectionRange(start, end);
        obj.focus(); // works but cursor often does not blink in FF -- why?
    }
    else if (obj.createTextRange) // IE
    {
        var range = obj.createTextRange();
        range.collapse(true);
        range.moveEnd('character', end);
        range.moveStart('character', start);
        range.select();
    }
};

//
// moves caret to specified 1-based line of textarea
//
// if hilight is true, also highlights that line
//
function go_to_line(obj, n, hilight)
{
    n--; // convert to 0-based
    var v = obj.value;
    if (msie)
    {
        // the IE textarea has CRLFs, and regexps
        // match the CRLFs with a match length of 2,
        // but the IE args to set_selection_range()
        // will be as if they were LFs.  ARGH!!
        v = v.replace(/\r/g, '');
    }
    var re = /\n/g;
    var index = 0;
//    console.log('line ' + n);
    re.lastIndex = 0;
    var worked = true;
    var res;
    for(var i=0; i < n; i++)
    {
        if (null == (res=re.exec(v)))
        {
            worked = false;
            break;
        }
        else
            index = re.lastIndex;
    }
    var index2;
    if (hilight)
    {
        if (!worked)
            index2 = v.length;
        else
        {
            if (null != (res=re.exec(v)))
                index2 = re.lastIndex;
            else
                index2 = v.length;
        }
    }
    else
    {
        index2 = index;
    }
//    alert(index + ' ' + index2 + ' ' + res[0].length);
    set_selection_range(obj, index, index2 - res[0].length);
}

function do_xspeak_popup(x, window_id)
{
    try
    {
        x.config(ei('bs').value); // does nothing if config is unchanged
    }
    catch (e)
    {
        alert("Please fix the following problem with the words " +
              "that you typed in:\n" +
              e.message);
        go_to_line(ei('bs'), e.lineNumber, true);
        return;
    }

    var html;

    if (msie)
    {
        // msie does not give us line numbers at all.  sigh.
        try
        {
            html = eval(x.code);
        }
        catch (e)
        {
            alert("Please fix the following problem with the " +
                  "JavaScript code " +
                  "at the end of the custom BS box:\n" +
                  e.message);
            go_to_line(ei('bs'), x.code_line_number, true);
            return;
        }
    }
    else
    {
        // FireFox does...but there's a catch...
        var lnhack;
        try
        {
            try { not_defined(); } catch (e2) { lnhack = e2.lineNumber; }
            html = eval(x.code);
        }
        catch (e)
        {
            // horrible hack: JS returns line number of line in
            // THIS file plus line in eval.  we use lnhack to get
            // real line number.  sigh!
            var line_number = 
                e.lineNumber - lnhack + x.code_line_number - 1;
            alert("Please fix the following problem with the " +
                  "JavaScript code " +
                  "at the end of the custom BS box:\n" +
                  "Line: " + line_number + "\n" +
                  e.message);
            go_to_line(ei('bs'), line_number, true);
            return;
        }
    }
        
    var w = window.open('',
                        window_id,
                        'width=750,height=600'
                        +',menubar=1'
                        +',toolbar=0'
                        +',status=1'
                        +',scrollbars=1'
                        +',resizable=1');

    w.document.write(html);
    w.document.close();
}
