(svn r1929) Feature: [namegen] Support for dynamic generation of the Czech town names.

The static names are still used in 1/4 of cases. I think the tables for
dynamic generation must look pretty spectacular. :-) New stems are still
needed and there can be occasional glitches, please let me know.

I guess that this method of dynamic generation could be used for at least
Slovak town names, too. And possibly other Slavic languages?
This commit is contained in:
pasky
2005-03-05 21:00:13 +00:00
parent 4325f13f74
commit d0f9cc27a3
2 changed files with 376 additions and 1 deletions

155
namegen.c
View File

@@ -318,7 +318,160 @@ static byte MakePolishTownName(char *buf, uint32 seed)
static byte MakeCzechTownName(char *buf, uint32 seed) static byte MakeCzechTownName(char *buf, uint32 seed)
{ {
strcpy(buf, name_czech_real[SeedChance(0, lengthof(name_czech_real), seed)]); /* Probability of prefixes/suffixes */
/* 0..11 prefix, 12..13 prefix+suffix, 14..17 suffix, 18..31 nothing */
int prob_tails;
bool do_prefix, do_suffix, dynamic_subst;
/* IDs of the respective parts */
int prefix = 0, stem = 0, postfix = 0, ending = 0, suffix = 0;
/* The select criteria. */
enum CzechGender gender;
enum CzechChoose choose;
enum CzechAllow allow;
// 1:3 chance to use a real name.
if (SeedChance(0, 4, seed) == 0) {
strcpy(buf, name_czech_real[SeedChance(1, lengthof(name_czech_real), seed)]);
return 0;
}
// NUL terminates the string for strcat()
strcpy(buf, "");
prob_tails = SeedChance(2, 32, seed);
do_prefix = prob_tails < 12;
do_suffix = prob_tails > 11 && prob_tails < 17;
if (do_prefix) prefix = SeedChance(5, lengthof(name_czech_adj), seed);
if (do_suffix) suffix = SeedChance(7, lengthof(name_czech_suffix), seed);
// 3:1 chance 3:1 to use dynamic substantive
stem = SeedChance(9, lengthof(name_czech_subst_full)
+ 3 * lengthof(name_czech_subst_stem),
seed);
if (stem < (int) lengthof(name_czech_subst_full)) {
// That was easy!
dynamic_subst = false;
gender = name_czech_subst_full[stem].gender;
choose = name_czech_subst_full[stem].choose;
allow = name_czech_subst_full[stem].allow;
} else {
unsigned int map[lengthof(name_czech_subst_ending)];
int ending_start = -1, ending_stop = -1;
int i;
// Load the substantive
dynamic_subst = true;
stem -= lengthof(name_czech_subst_full);
stem %= lengthof(name_czech_subst_stem);
gender = name_czech_subst_stem[stem].gender;
choose = name_czech_subst_stem[stem].choose;
allow = name_czech_subst_stem[stem].allow;
// Load the postfix (1:1 chance that a postfix will be inserted)
postfix = SeedChance(14, lengthof(name_czech_subst_postfix) * 2, seed);
if (choose & CZC_POSTFIX) {
// Always get a real postfix.
postfix %= lengthof(name_czech_subst_postfix);
}
if (choose & CZC_NOPOSTFIX) {
// Always drop a postfix.
postfix += lengthof(name_czech_subst_postfix);
}
if (postfix < (int) lengthof(name_czech_subst_postfix))
choose |= CZC_POSTFIX;
else
choose |= CZC_NOPOSTFIX;
// Localize the array segment containing a good gender
for (ending = 0; ending < (int) lengthof(name_czech_subst_ending); ending++) {
const struct CzechNameSubst *e = &name_czech_subst_ending[ending];
if (gender == CZG_FREE
|| (gender == CZG_NFREE && e->gender != CZG_SNEUT && e->gender != CZG_PNEUT)
|| (gender == e->gender)) {
if (ending_start < 0)
ending_start = ending;
} else if (ending_start >= 0) {
ending_stop = ending - 1;
break;
}
}
if (ending_stop < 0) {
// Whoa. All the endings matched.
ending_stop = ending - 1;
}
// Make a sequential map of the items with good mask
i = 0;
for (ending = ending_start; ending <= ending_stop; ending++) {
const struct CzechNameSubst *e = &name_czech_subst_ending[ending];
if ((e->choose & choose) == choose && (e->allow & allow) != 0)
map[i++] = ending;
}
assert(i > 0);
// Load the ending
ending = map[SeedChance(16, i, seed)];
// Override possible CZG_*FREE; this must be a real gender,
// otherwise we get overflow when modifying the adjectivum.
gender = name_czech_subst_ending[ending].gender;
assert(gender != CZG_FREE && gender != CZG_NFREE);
}
if (do_prefix && (name_czech_adj[prefix].choose & choose) != choose) {
// Throw away non-matching prefix.
do_prefix = false;
}
// Now finally construct the name
if (do_prefix) {
enum CzechPattern pattern = name_czech_adj[prefix].pattern;
int endpos;
strcat(buf, name_czech_adj[prefix].name);
endpos = strlen(buf) - 1;
if (gender == CZG_SMASC && pattern == CZP_PRIVL) {
/* -ovX -> -uv */
buf[endpos - 2] = 'u';
assert(buf[endpos - 1] == 'v');
buf[endpos] = '\0';
} else {
buf[endpos] = name_czech_patmod[gender][pattern];
}
strcat(buf, " ");
}
if (dynamic_subst) {
strcat(buf, name_czech_subst_stem[stem].name);
if (postfix < (int) lengthof(name_czech_subst_postfix)) {
int postlen, endlen;
postlen = strlen(name_czech_subst_postfix[postfix]);
endlen = strlen(name_czech_subst_ending[ending].name);
// Kill the "avava" and "Jananna"-like cases
if (2 > postlen || postlen > endlen
|| (name_czech_subst_postfix[postfix][1]
!= name_czech_subst_ending[ending].name[1]
&& name_czech_subst_postfix[postfix][2]
!= name_czech_subst_ending[ending].name[1]))
strcat(buf, name_czech_subst_postfix[postfix]);
}
strcat(buf, name_czech_subst_ending[ending].name);
} else {
strcat(buf, name_czech_subst_full[stem].name);
}
if (do_suffix) {
strcat(buf, " ");
strcat(buf, name_czech_suffix[suffix]);
}
return 0; return 0;
} }

View File

@@ -1659,6 +1659,228 @@ static const char *name_czech_real[] = {
"Znojmo" "Znojmo"
}; };
/* The advanced hyperintelligent Czech town names generator! */
// Sing., pl.
enum CzechGender {
CZG_SMASC,
CZG_SFEM,
CZG_SNEUT,
CZG_PMASC,
CZG_PFEM,
CZG_PNEUT,
// Special for substantive stems - the ending chooses the gender.
CZG_FREE,
// Like CZG_FREE, but disallow CZG_SNEUT.
CZG_NFREE
};
enum CzechPattern {
CZP_JARNI,
CZP_MLADY,
CZP_PRIVL
};
/* [CzechGender][CzechPattern] - replaces the last character of the adjective
* by this. */
// XXX: [CZG_SMASC][CZP_PRIVL] needs special handling: -ovX -> -uv.
static const char name_czech_patmod[6][3] = {
/* CZG_SMASC */ { '<EFBFBD>', '<EFBFBD>', 'X' },
/* CZG_SFEM */ { '<EFBFBD>', '<EFBFBD>', 'a' },
/* CZG_SNEUT */ { '<EFBFBD>', '<EFBFBD>', 'o' },
/* CZG_PMASC */ { '<EFBFBD>', '<EFBFBD>', 'y' },
/* CZG_PFEM */ { '<EFBFBD>', '<EFBFBD>', 'y' },
/* CZG_PNEUT */ { '<EFBFBD>', '<EFBFBD>', 'a' }
};
// This way the substantives can choose only some adjectives/endings:
// At least one of these flags must be satisfied:
enum CzechAllow {
CZA_SHORT = 1,
CZA_MIDDLE = 2,
CZA_LONG = 4,
CZA_ALL = ~0
};
// All these flags must be satisfied (in the stem->others direction):
enum CzechChoose {
CZC_NORMAL = 1,
CZC_COLOR = 2,
CZC_POSTFIX = 4, // Matched if postfix was inserted.
CZC_NOPOSTFIX = 8, // Matched if no postfix was inserted.
CZC_ANY = ~0
};
struct CzechNameSubst {
enum CzechGender gender;
enum CzechAllow allow;
enum CzechChoose choose;
const char *name;
};
struct CzechNameAdj {
enum CzechPattern pattern;
enum CzechChoose choose;
const char *name;
};
// Some of items which should be common are doubled.
static const struct CzechNameAdj name_czech_adj[] = {
{ CZP_JARNI, CZC_ANY, "Horn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Horn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Doln<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Doln<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Predn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Zadn<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Kosteln<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "Havran<EFBFBD>" },
{ CZP_JARNI, CZC_ANY, "R<EFBFBD>cn<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Velk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Velk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Mal<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Mal<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Vysok<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Cesk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Moravsk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Slov<EFBFBD>ck<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Uhersk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Star<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Star<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Nov<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Nov<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Mlad<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Kr<EFBFBD>lovsk<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Kamenn<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Cihlov<EFBFBD>" },
{ CZP_MLADY, CZC_ANY, "Divn<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Cerven<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Cerven<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Zelen<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Zlut<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Siv<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Sed<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "B<EFBFBD>l<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Modr<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Ruzov<EFBFBD>" },
{ CZP_MLADY, CZC_COLOR, "Cern<EFBFBD>" },
{ CZP_PRIVL, CZC_ANY, "Kr<EFBFBD>lova" },
{ CZP_PRIVL, CZC_ANY, "Janova" },
{ CZP_PRIVL, CZC_ANY, "Karlova" },
{ CZP_PRIVL, CZC_ANY, "Jir<EFBFBD>kova" },
{ CZP_PRIVL, CZC_ANY, "Petrova" },
{ CZP_PRIVL, CZC_ANY, "Sudovo" },
};
// Considered a stem for choose/allow matching purposes.
static const struct CzechNameSubst name_czech_subst_full[] = {
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Sedlec" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
{ CZG_SMASC, CZA_ALL, CZC_NORMAL, "<EFBFBD>val" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hora" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
{ CZG_SFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hlava" },
{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Zd<EFBFBD>r" },
{ CZG_PMASC, CZA_ALL, CZC_NORMAL, "<EFBFBD>valy" },
{ CZG_PFEM, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Luka" },
{ CZG_PNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
};
// TODO: More stems needed. --pasky
static const struct CzechNameSubst name_czech_subst_stem[] = {
{ CZG_SMASC, CZA_MIDDLE, CZC_NORMAL | CZC_COLOR, "Kostel" },
{ CZG_SMASC, CZA_MIDDLE, CZC_NORMAL | CZC_COLOR, "Kl<EFBFBD>ster" },
{ CZG_SMASC, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Lhot" },
{ CZG_SFEM, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Lhot" },
{ CZG_SFEM, CZA_SHORT, CZC_NORMAL | CZC_COLOR, "Hur" },
{ CZG_FREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Sedl" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hrad" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Pras" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Baz" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Tes" },
{ CZG_NFREE, CZA_MIDDLE, CZC_NORMAL, "Uz" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Br" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Vod" },
{ CZG_NFREE, CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Jan" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Prach" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Kunr" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Strak" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Vit" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Vys" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Zat" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Zer" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Stred" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Harv" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Pruh" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Tach" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "P<EFBFBD>sn" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jin" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jes" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Jar" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Sok" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Hod" },
{ CZG_NFREE, CZA_LONG, CZC_NORMAL, "Net" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Praz" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Nerat" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL, "Kral" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL | CZC_NOPOSTFIX, "Pan" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Odstred" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Mrat" },
{ CZG_FREE, CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hlav" },
{ CZG_FREE, CZA_SHORT | CZA_MIDDLE, CZC_NORMAL, "Mer" },
};
// Optional postfix inserted between stem and ending.
static const char *name_czech_subst_postfix[] = {
"av", "an", "at",
"ov", "on", "ot",
"ev", "en", "et",
};
// This array must have the both neutral genders at the end!
static const struct CzechNameSubst name_czech_subst_ending[] = {
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "ec" },
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "<EFBFBD>n" },
{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ov" },
{ CZG_SMASC, CZA_SHORT | CZA_LONG, CZC_ANY, "kov" },
{ CZG_SMASC, CZA_LONG, CZC_POSTFIX, "<EFBFBD>n" },
{ CZG_SMASC, CZA_LONG, CZC_POSTFIX, "n<EFBFBD>k" },
{ CZG_SFEM, CZA_SHORT, CZC_ANY, "ka" },
{ CZG_SFEM, CZA_MIDDLE, CZC_ANY, "inka" },
{ CZG_SFEM, CZA_MIDDLE, CZC_NOPOSTFIX, "na" },
{ CZG_SFEM, CZA_MIDDLE, CZC_ANY, "n<EFBFBD>" },
{ CZG_SFEM, CZA_LONG, CZC_ANY, "ava" },
{ CZG_PMASC, CZA_LONG, CZC_ANY, "<EFBFBD>ky" },
{ CZG_PMASC, CZA_LONG, CZC_ANY, "upy" },
{ CZG_PFEM, CZA_LONG, CZC_ANY, "avy" },
{ CZG_PFEM, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ice" },
{ CZG_PNEUT, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "na" },
{ CZG_SNEUT, CZA_SHORT | CZA_MIDDLE, CZC_ANY, "no" },
{ CZG_SNEUT, CZA_LONG, CZC_ANY, "iste" },
};
static const char *name_czech_suffix[] = {
"nad Cydlinou",
"nad Dyj<79>",
"nad Jihlavou",
"nad Labem",
"nad Lesy",
"nad Moravou",
"nad Nisou",
"nad Odrou",
"nad Ostravic<69>",
"nad S<>zavou",
"nad Vltavou",
"pod Pradedem",
"pod Radhostem",
"pod R<>pem",
"pod Snezkou",
"pod Spic<69>kem",
"pod Sedlem",
};
static const char *name_romanian_real[]= { static const char *name_romanian_real[]= {
"Adjud", "Adjud",
"Alba Iulia", "Alba Iulia",