SphinxBase  5prealpha
jsgf.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2007 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #include <string.h>
39 #include <assert.h>
40 
41 #include "sphinxbase/ckd_alloc.h"
42 #include "sphinxbase/strfuncs.h"
43 #include "sphinxbase/hash_table.h"
44 #include "sphinxbase/filename.h"
45 #include "sphinxbase/err.h"
46 #include "sphinxbase/jsgf.h"
47 
48 #include "jsgf_internal.h"
49 #include "jsgf_parser.h"
50 #include "jsgf_scanner.h"
51 
52 extern int yyparse(void *scanner, jsgf_t * jsgf);
53 
61 static int expand_rule(jsgf_t * grammar, jsgf_rule_t * rule,
62  int rule_entry, int rule_exit);
63 
65 jsgf_atom_new(char *name, float weight)
66 {
67  jsgf_atom_t *atom;
68 
69  atom = ckd_calloc(1, sizeof(*atom));
70  atom->name = ckd_salloc(name);
71  atom->weight = weight;
72  return atom;
73 }
74 
75 int
76 jsgf_atom_free(jsgf_atom_t * atom)
77 {
78  if (atom == NULL)
79  return 0;
80  ckd_free(atom->name);
81  ckd_free(atom);
82  return 0;
83 }
84 
85 jsgf_t *
87 {
88  jsgf_t *grammar;
89 
90  grammar = ckd_calloc(1, sizeof(*grammar));
91  /* If this is an imported/subgrammar, then we will share a global
92  * namespace with the parent grammar. */
93  if (parent) {
94  grammar->rules = parent->rules;
95  grammar->imports = parent->imports;
96  grammar->searchpath = parent->searchpath;
97  grammar->parent = parent;
98  }
99  else {
100  grammar->rules = hash_table_new(64, 0);
101  grammar->imports = hash_table_new(16, 0);
102  }
103 
104  return grammar;
105 }
106 
107 void
109 {
110  /* FIXME: Probably should just use refcounting instead. */
111  if (jsgf->parent == NULL) {
112  hash_iter_t *itor;
113  gnode_t *gn;
114 
115  for (itor = hash_table_iter(jsgf->rules); itor;
116  itor = hash_table_iter_next(itor)) {
117  ckd_free((char *) itor->ent->key);
118  jsgf_rule_free((jsgf_rule_t *) itor->ent->val);
119  }
120  hash_table_free(jsgf->rules);
121  for (itor = hash_table_iter(jsgf->imports); itor;
122  itor = hash_table_iter_next(itor)) {
123  ckd_free((char *) itor->ent->key);
124  jsgf_grammar_free((jsgf_t *) itor->ent->val);
125  }
126  hash_table_free(jsgf->imports);
127  for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
128  ckd_free(gnode_ptr(gn));
129  glist_free(jsgf->searchpath);
130  for (gn = jsgf->links; gn; gn = gnode_next(gn))
131  ckd_free(gnode_ptr(gn));
132  glist_free(jsgf->links);
133  }
134  ckd_free(jsgf->name);
135  ckd_free(jsgf->version);
136  ckd_free(jsgf->charset);
137  ckd_free(jsgf->locale);
138  ckd_free(jsgf);
139 }
140 
141 static void
142 jsgf_rhs_free(jsgf_rhs_t * rhs)
143 {
144  gnode_t *gn;
145 
146  if (rhs == NULL)
147  return;
148 
149  jsgf_rhs_free(rhs->alt);
150  for (gn = rhs->atoms; gn; gn = gnode_next(gn))
151  jsgf_atom_free(gnode_ptr(gn));
152  glist_free(rhs->atoms);
153  ckd_free(rhs);
154 }
155 
156 jsgf_atom_t *
157 jsgf_kleene_new(jsgf_t * jsgf, jsgf_atom_t * atom, int plus)
158 {
159  jsgf_rule_t *rule;
160  jsgf_atom_t *rule_atom;
161  jsgf_rhs_t *rhs;
162 
163  /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */
164  /* Or if plus is true, (<name> | <name> <g0006>) */
165  rhs = ckd_calloc(1, sizeof(*rhs));
166  if (plus)
167  rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
168  else
169  rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
170  rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
171  rule_atom = jsgf_atom_new(rule->name, 1.0);
172  rhs = ckd_calloc(1, sizeof(*rhs));
173  rhs->atoms = glist_add_ptr(NULL, rule_atom);
174  rhs->atoms = glist_add_ptr(rhs->atoms, atom);
175  rule->rhs->alt = rhs;
176 
177  return jsgf_atom_new(rule->name, 1.0);
178 }
179 
180 jsgf_rule_t *
181 jsgf_optional_new(jsgf_t * jsgf, jsgf_rhs_t * exp)
182 {
183  jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
184  jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
185  rhs->alt = exp;
186  rhs->atoms = glist_add_ptr(NULL, atom);
187  return jsgf_define_rule(jsgf, NULL, rhs, 0);
188 }
189 
190 void
191 jsgf_add_link(jsgf_t * grammar, jsgf_atom_t * atom, int from, int to)
192 {
193  jsgf_link_t *link;
194 
195  link = ckd_calloc(1, sizeof(*link));
196  link->from = from;
197  link->to = to;
198  link->atom = atom;
199  grammar->links = glist_add_ptr(grammar->links, link);
200 }
201 
202 static char *
203 extract_grammar_name(char *rule_name)
204 {
205  char *dot_pos;
206  char *grammar_name = ckd_salloc(rule_name + 1);
207  if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
208  ckd_free(grammar_name);
209  return NULL;
210  }
211  *dot_pos = '\0';
212  return grammar_name;
213 }
214 
215 char const *
217 {
218  return jsgf->name;
219 }
220 
221 static char *
222 jsgf_fullname(jsgf_t * jsgf, const char *name)
223 {
224  char *fullname;
225 
226  /* Check if it is already qualified */
227  if (strchr(name + 1, '.'))
228  return ckd_salloc(name);
229 
230  /* Skip leading < in name */
231  fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
232  sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
233  return fullname;
234 }
235 
236 static char *
237 jsgf_fullname_from_rule(jsgf_rule_t * rule, const char *name)
238 {
239  char *fullname, *grammar_name;
240 
241  /* Check if it is already qualified */
242  if (strchr(name + 1, '.'))
243  return ckd_salloc(name);
244 
245  /* Skip leading < in name */
246  if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
247  return ckd_salloc(name);
248  fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
249  sprintf(fullname, "<%s.%s", grammar_name, name + 1);
250  ckd_free(grammar_name);
251 
252  return fullname;
253 }
254 
255 /* Extract as rulename everything after the secondlast dot, if existent.
256  * Because everything before the secondlast dot is the path-specification. */
257 static char *
258 importname2rulename(char *importname)
259 {
260  char *rulename = ckd_salloc(importname);
261  char *last_dotpos;
262  char *secondlast_dotpos;
263 
264  if ((last_dotpos = strrchr(rulename + 1, '.')) != NULL) {
265  *last_dotpos = '\0';
266  if ((secondlast_dotpos = strrchr(rulename + 1, '.')) != NULL) {
267  *last_dotpos = '.';
268  *secondlast_dotpos = '<';
269  secondlast_dotpos = ckd_salloc(secondlast_dotpos);
270  ckd_free(rulename);
271  return secondlast_dotpos;
272  }
273  else {
274  *last_dotpos = '.';
275  return rulename;
276  }
277  }
278  else {
279  return rulename;
280  }
281 }
282 
283 #define NO_NODE -1
284 #define RECURSIVE_NODE -2
285 
294 static int
295 expand_rhs(jsgf_t * grammar, jsgf_rule_t * rule, jsgf_rhs_t * rhs,
296  int rule_entry, int rule_exit)
297 {
298  gnode_t *gn;
299  int lastnode;
300 
301  /* Last node expanded in this sequence. */
302  lastnode = rule_entry;
303 
304  /* Iterate over atoms in rhs and generate links/nodes */
305  for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
306  jsgf_atom_t *atom = gnode_ptr(gn);
307 
308  if (jsgf_atom_is_rule(atom)) {
309  jsgf_rule_t *subrule;
310  char *fullname;
311  gnode_t *subnode;
312  jsgf_rule_stack_t *rule_stack_entry = NULL;
313 
314  /* Special case for <NULL> and <VOID> pseudo-rules
315  If this is the only atom in the rhs, and it's the
316  first rhs in the rule, then emit a null transition,
317  creating an exit state if needed. */
318  if (0 == strcmp(atom->name, "<NULL>")) {
319  if (gn == rhs->atoms && gnode_next(gn) == NULL) {
320  if (rule_exit == NO_NODE) {
321  jsgf_add_link(grammar, atom,
322  lastnode, grammar->nstate);
323  rule_exit = lastnode = grammar->nstate;
324  ++grammar->nstate;
325  }
326  else {
327  jsgf_add_link(grammar, atom, lastnode, rule_exit);
328  }
329  }
330  continue;
331  }
332  else if (0 == strcmp(atom->name, "<VOID>")) {
333  /* Make this entire RHS unspeakable */
334  return NO_NODE;
335  }
336 
337  fullname = jsgf_fullname_from_rule(rule, atom->name);
339  (grammar->rules, fullname, (void **) &subrule) == -1) {
340  E_ERROR("Undefined rule in RHS: %s\n", fullname);
341  ckd_free(fullname);
342  return NO_NODE;
343  }
344  ckd_free(fullname);
345 
346  /* Look for this subrule in the stack of expanded rules */
347  for (subnode = grammar->rulestack; subnode;
348  subnode = gnode_next(subnode)) {
349  rule_stack_entry =
350  (jsgf_rule_stack_t *) gnode_ptr(subnode);
351  if (rule_stack_entry->rule == subrule)
352  break;
353  }
354 
355  if (subnode != NULL) {
356  /* Allow right-recursion only. */
357  if (gnode_next(gn) != NULL) {
358  E_ERROR
359  ("Only right-recursion is permitted (in %s.%s)\n",
360  grammar->name, rule->name);
361  return NO_NODE;
362  }
363  /* Add a link back to the beginning of this rule instance */
364  E_INFO("Right recursion %s %d => %d\n", atom->name,
365  lastnode, rule_stack_entry->entry);
366  jsgf_add_link(grammar, atom, lastnode,
367  rule_stack_entry->entry);
368 
369  /* Let our caller know that this rhs didn't reach an
370  end state. */
371  lastnode = RECURSIVE_NODE;
372  }
373  else {
374  /* If this is the last atom in this rhs, link its
375  expansion to the parent rule's exit state.
376  Otherwise, create a new exit state for it. */
377  int subruleexit = NO_NODE;
378  if (gnode_next(gn) == NULL && rule_exit >= 0)
379  subruleexit = rule_exit;
380 
381  /* Expand the subrule */
382  lastnode =
383  expand_rule(grammar, subrule, lastnode, subruleexit);
384 
385  if (lastnode == NO_NODE)
386  return NO_NODE;
387  }
388  }
389  else {
390  /* An exit-state is created if this isn't the last atom
391  in the rhs, or if the containing rule doesn't have an
392  exit state yet.
393  Otherwise, the rhs's exit state becomes the containing
394  rule's exit state. */
395  int exitstate;
396  if (gnode_next(gn) == NULL && rule_exit >= 0) {
397  exitstate = rule_exit;
398  }
399  else {
400  exitstate = grammar->nstate;
401  ++grammar->nstate;
402  }
403 
404  /* Add a link for this token */
405  jsgf_add_link(grammar, atom, lastnode, exitstate);
406  lastnode = exitstate;
407  }
408  }
409 
410  return lastnode;
411 }
412 
413 static int
414 expand_rule(jsgf_t * grammar, jsgf_rule_t * rule, int rule_entry,
415  int rule_exit)
416 {
417  jsgf_rule_stack_t *rule_stack_entry;
418  jsgf_rhs_t *rhs;
419 
420  /* Push this rule onto the stack */
421  rule_stack_entry =
423  rule_stack_entry->rule = rule;
424  rule_stack_entry->entry = rule_entry;
425  grammar->rulestack = glist_add_ptr(grammar->rulestack,
426  rule_stack_entry);
427 
428  for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
429  int lastnode;
430 
431  lastnode = expand_rhs(grammar, rule, rhs, rule_entry, rule_exit);
432 
433  if (lastnode == NO_NODE) {
434  return NO_NODE;
435  }
436  else if (lastnode == RECURSIVE_NODE) {
437  /* The rhs ended with right-recursion, i.e. a transition to
438  an earlier state. Nothing needs to happen at this level. */
439  ;
440  }
441  else if (rule_exit == NO_NODE) {
442  /* If this rule doesn't have an exit state yet, use the exit
443  state of its first right-hand-side.
444  All other right-hand-sides will use this exit state. */
445  assert(lastnode >= 0);
446  rule_exit = lastnode;
447  }
448  }
449 
450  /* If no exit-state was created, use the entry-state. */
451  if (rule_exit == NO_NODE) {
452  rule_exit = rule_entry;
453  }
454 
455  /* Pop this rule from the rule stack */
456  ckd_free(gnode_ptr(grammar->rulestack));
457  grammar->rulestack = gnode_free(grammar->rulestack, NULL);
458 
459  return rule_exit;
460 }
461 
464 {
465  return hash_table_iter(grammar->rules);
466 }
467 
468 jsgf_rule_t *
469 jsgf_get_rule(jsgf_t * grammar, char const *name)
470 {
471  void *val;
472  char *fullname;
473 
474  fullname = string_join("<", name, ">", NULL);
475  if (hash_table_lookup(grammar->rules, fullname, &val) < 0) {
476  ckd_free(fullname);
477  return NULL;
478  }
479  ckd_free(fullname);
480  return (jsgf_rule_t *) val;
481 }
482 
483 jsgf_rule_t *
485 {
486  jsgf_rule_iter_t *itor;
487  jsgf_rule_t *public_rule = NULL;
488 
489  for (itor = jsgf_rule_iter(grammar); itor;
490  itor = jsgf_rule_iter_next(itor)) {
491  jsgf_rule_t *rule = jsgf_rule_iter_rule(itor);
492  if (jsgf_rule_public(rule)) {
493  const char *rule_name = jsgf_rule_name(rule);
494  char *dot_pos;
495  if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) {
496  public_rule = rule;
497  jsgf_rule_iter_free(itor);
498  break;
499  }
500  if (0 ==
501  strncmp(rule_name + 1, jsgf_grammar_name(grammar),
502  dot_pos - rule_name - 1)) {
503  public_rule = rule;
504  jsgf_rule_iter_free(itor);
505  break;
506  }
507  }
508  }
509  return public_rule;
510 }
511 
512 char const *
514 {
515  return rule->name;
516 }
517 
518 int
520 {
521  return rule->is_public;
522 }
523 
524 static fsg_model_t *
525 jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule,
526  logmath_t * lmath, float32 lw, int do_closure)
527 {
528  fsg_model_t *fsg;
529  glist_t nulls;
530  gnode_t *gn;
531  int rule_entry, rule_exit;
532 
533  if (grammar == NULL || rule == NULL)
534  return NULL;
535 
536  /* Clear previous links */
537  for (gn = grammar->links; gn; gn = gnode_next(gn)) {
538  ckd_free(gnode_ptr(gn));
539  }
540  glist_free(grammar->links);
541  grammar->links = NULL;
542  grammar->nstate = 0;
543 
544  /* Create the top-level entry state, and expand the
545  top-level rule. */
546  rule_entry = grammar->nstate++;
547  rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE);
548 
549  /* If no exit-state was created, create one. */
550  if (rule_exit == NO_NODE) {
551  rule_exit = grammar->nstate++;
552  jsgf_add_link(grammar, NULL, rule_entry, rule_exit);
553  }
554 
555  fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
556  fsg->start_state = rule_entry;
557  fsg->final_state = rule_exit;
558  grammar->links = glist_reverse(grammar->links);
559  for (gn = grammar->links; gn; gn = gnode_next(gn)) {
560  jsgf_link_t *link = gnode_ptr(gn);
561 
562  if (link->atom) {
563  if (jsgf_atom_is_rule(link->atom)) {
564  fsg_model_null_trans_add(fsg, link->from, link->to,
565  logmath_log(lmath,
566  link->atom->weight));
567  }
568  else {
569  int wid = fsg_model_word_add(fsg, link->atom->name);
570  fsg_model_trans_add(fsg, link->from, link->to,
571  logmath_log(lmath, link->atom->weight),
572  wid);
573  }
574  }
575  else {
576  fsg_model_null_trans_add(fsg, link->from, link->to, 0);
577  }
578  }
579  if (do_closure) {
580  nulls = fsg_model_null_trans_closure(fsg, NULL);
581  glist_free(nulls);
582  }
583 
584  return fsg;
585 }
586 
587 fsg_model_t *
589  logmath_t * lmath, float32 lw)
590 {
591  return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
592 }
593 
594 fsg_model_t *
596  logmath_t * lmath, float32 lw)
597 {
598  return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
599 }
600 
601 fsg_model_t *
602 jsgf_read_file(const char *file, logmath_t * lmath, float32 lw)
603 {
604  fsg_model_t *fsg;
605  jsgf_rule_t *rule;
606  jsgf_t *jsgf;
607  jsgf_rule_iter_t *itor;
608 
609  if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) {
610  E_ERROR("Error parsing file: %s\n", file);
611  return NULL;
612  }
613 
614  rule = NULL;
615  for (itor = jsgf_rule_iter(jsgf); itor;
616  itor = jsgf_rule_iter_next(itor)) {
617  rule = jsgf_rule_iter_rule(itor);
618  if (jsgf_rule_public(rule)) {
619  jsgf_rule_iter_free(itor);
620  break;
621  }
622  }
623  if (rule == NULL) {
624  E_ERROR("No public rules found in %s\n", file);
625  return NULL;
626  }
627  fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
628  jsgf_grammar_free(jsgf);
629  return fsg;
630 }
631 
632 fsg_model_t *
633 jsgf_read_string(const char *string, logmath_t * lmath, float32 lw)
634 {
635  fsg_model_t *fsg;
636  jsgf_rule_t *rule;
637  jsgf_t *jsgf;
638  jsgf_rule_iter_t *itor;
639 
640  if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) {
641  E_ERROR("Error parsing input string\n");
642  return NULL;
643  }
644 
645  rule = NULL;
646  for (itor = jsgf_rule_iter(jsgf); itor;
647  itor = jsgf_rule_iter_next(itor)) {
648  rule = jsgf_rule_iter_rule(itor);
649  if (jsgf_rule_public(rule)) {
650  jsgf_rule_iter_free(itor);
651  break;
652  }
653  }
654  if (rule == NULL) {
655  jsgf_grammar_free(jsgf);
656  E_ERROR("No public rules found in input string\n");
657  return NULL;
658  }
659  fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
660  jsgf_grammar_free(jsgf);
661  return fsg;
662 }
663 
664 
665 int
666 jsgf_write_fsg(jsgf_t * grammar, jsgf_rule_t * rule, FILE * outfh)
667 {
668  fsg_model_t *fsg;
669  logmath_t *lmath = logmath_init(1.0001, 0, 0);
670 
671  if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
672  goto error_out;
673 
674  fsg_model_write(fsg, outfh);
675  logmath_free(lmath);
676  return 0;
677 
678  error_out:
679  logmath_free(lmath);
680  return -1;
681 }
682 
683 jsgf_rule_t *
684 jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs,
685  int is_public)
686 {
687  jsgf_rule_t *rule;
688  void *val;
689 
690  if (name == NULL) {
691  name = ckd_malloc(strlen(jsgf->name) + 16);
692  sprintf(name, "<%s.g%05d>", jsgf->name,
693  hash_table_inuse(jsgf->rules));
694  }
695  else {
696  char *newname;
697 
698  newname = jsgf_fullname(jsgf, name);
699  name = newname;
700  }
701 
702  rule = ckd_calloc(1, sizeof(*rule));
703  rule->refcnt = 1;
704  rule->name = ckd_salloc(name);
705  rule->rhs = rhs;
706  rule->is_public = is_public;
707 
708  E_INFO("Defined rule: %s%s\n",
709  rule->is_public ? "PUBLIC " : "", rule->name);
710  val = hash_table_enter(jsgf->rules, name, rule);
711  if (val != (void *) rule) {
712  E_WARN("Multiply defined symbol: %s\n", name);
713  }
714  return rule;
715 }
716 
717 jsgf_rule_t *
718 jsgf_rule_retain(jsgf_rule_t * rule)
719 {
720  ++rule->refcnt;
721  return rule;
722 }
723 
724 int
725 jsgf_rule_free(jsgf_rule_t * rule)
726 {
727  if (rule == NULL)
728  return 0;
729  if (--rule->refcnt > 0)
730  return rule->refcnt;
731  jsgf_rhs_free(rule->rhs);
732  ckd_free(rule->name);
733  ckd_free(rule);
734  return 0;
735 }
736 
737 
738 /* FIXME: This should go in libsphinxutil */
739 static char *
740 path_list_search(glist_t paths, char *path)
741 {
742  gnode_t *gn;
743 
744  for (gn = paths; gn; gn = gnode_next(gn)) {
745  char *fullpath;
746  FILE *tmp;
747 
748  fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
749  tmp = fopen(fullpath, "r");
750  if (tmp != NULL) {
751  fclose(tmp);
752  return fullpath;
753  }
754  else {
755  ckd_free(fullpath);
756  }
757  }
758  return NULL;
759 }
760 
761 jsgf_rule_t *
762 jsgf_import_rule(jsgf_t * jsgf, char *name)
763 {
764  char *c, *path, *newpath;
765  size_t namelen, packlen;
766  void *val;
767  jsgf_t *imp;
768  int import_all;
769 
770  /* Trim the leading and trailing <> */
771  namelen = strlen(name);
772  path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
773  strcpy(path, name + 1);
774  /* Split off the first part of the name */
775  c = strrchr(path, '.');
776  if (c == NULL) {
777  E_ERROR("Imported rule is not qualified: %s\n", name);
778  ckd_free(path);
779  return NULL;
780  }
781  packlen = c - path;
782  *c = '\0';
783 
784  /* Look for import foo.* */
785  import_all = (strlen(name) > 2
786  && 0 == strcmp(name + namelen - 3, ".*>"));
787 
788  /* Construct a filename. */
789  for (c = path; *c; ++c)
790  if (*c == '.')
791  *c = '/';
792  strcat(path, ".gram");
793  newpath = path_list_search(jsgf->searchpath, path);
794  if (newpath == NULL) {
795  E_ERROR("Failed to find grammar %s\n", path);
796  ckd_free(path);
797  return NULL;
798  }
799  ckd_free(path);
800 
801  path = newpath;
802  E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
803 
804  /* FIXME: Also, we need to make sure that path is fully qualified
805  * here, by adding any prefixes from jsgf->name to it. */
806  /* See if we have parsed it already */
807  if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
808  E_INFO("Already imported %s\n", path);
809  imp = val;
810  ckd_free(path);
811  }
812  else {
813  /* If not, parse it. */
814  imp = jsgf_parse_file(path, jsgf);
815  val = hash_table_enter(jsgf->imports, path, imp);
816  if (val != (void *) imp) {
817  E_WARN("Multiply imported file: %s\n", path);
818  }
819  }
820  if (imp != NULL) {
821  hash_iter_t *itor;
822  /* Look for public rules matching rulename. */
823  for (itor = hash_table_iter(imp->rules); itor;
824  itor = hash_table_iter_next(itor)) {
825  hash_entry_t *he = itor->ent;
826  jsgf_rule_t *rule = hash_entry_val(he);
827  int rule_matches;
828  char *rule_name = importname2rulename(name);
829 
830  if (import_all) {
831  /* Match package name (symbol table is shared) */
832  rule_matches =
833  !strncmp(rule_name, rule->name, packlen + 1);
834  }
835  else {
836  /* Exact match */
837  rule_matches = !strcmp(rule_name, rule->name);
838  }
839  ckd_free(rule_name);
840  if (rule->is_public && rule_matches) {
841  void *val;
842  char *newname;
843 
844  /* Link this rule into the current namespace. */
845  c = strrchr(rule->name, '.');
846  assert(c != NULL);
847  newname = jsgf_fullname(jsgf, c);
848 
849  E_INFO("Imported %s\n", newname);
850  val = hash_table_enter(jsgf->rules, newname,
851  jsgf_rule_retain(rule));
852  if (val != (void *) rule) {
853  E_WARN("Multiply defined symbol: %s\n", newname);
854  }
855  if (!import_all) {
856  hash_table_iter_free(itor);
857  return rule;
858  }
859  }
860  }
861  }
862 
863  return NULL;
864 }
865 
866 static void
867 jsgf_set_search_path(jsgf_t * jsgf, const char *filename)
868 {
869  char *jsgf_path;
870 
871 #if !defined(_WIN32_WCE)
872  if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
873  char *word, *c;
874  /* FIXME: This should be a function in libsphinxbase. */
875  word = jsgf_path = ckd_salloc(jsgf_path);
876  while ((c = strchr(word, ':'))) {
877  *c = '\0';
878  jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
879  word = c + 1;
880  }
881  jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
882  jsgf->searchpath = glist_reverse(jsgf->searchpath);
883  return;
884  }
885 #endif
886 
887  if (!filename) {
888  jsgf->searchpath =
889  glist_add_ptr(jsgf->searchpath, ckd_salloc("."));
890  return;
891  }
892 
893  jsgf_path = ckd_salloc(filename);
894  path2dirname(filename, jsgf_path);
895  jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path);
896 }
897 
898 jsgf_t *
899 jsgf_parse_file(const char *filename, jsgf_t * parent)
900 {
901  yyscan_t yyscanner;
902  jsgf_t *jsgf;
903  int yyrv;
904  FILE *in = NULL;
905 
906  yylex_init(&yyscanner);
907  if (filename == NULL) {
908  yyset_in(stdin, yyscanner);
909  }
910  else {
911  in = fopen(filename, "r");
912  if (in == NULL) {
913  E_ERROR_SYSTEM("Failed to open %s for parsing", filename);
914  return NULL;
915  }
916  yyset_in(in, yyscanner);
917  }
918 
919  jsgf = jsgf_grammar_new(parent);
920 
921  if (!parent)
922  jsgf_set_search_path(jsgf, filename);
923 
924  yyrv = yyparse(yyscanner, jsgf);
925  if (yyrv != 0) {
926  E_ERROR("Failed to parse JSGF grammar from '%s'\n",
927  filename ? filename : "(stdin)");
928  jsgf_grammar_free(jsgf);
929  yylex_destroy(yyscanner);
930  return NULL;
931  }
932  if (in)
933  fclose(in);
934  yylex_destroy(yyscanner);
935 
936  return jsgf;
937 }
938 
939 jsgf_t *
940 jsgf_parse_string(const char *string, jsgf_t * parent)
941 {
942  yyscan_t yyscanner;
943  jsgf_t *jsgf;
944  int yyrv;
945  YY_BUFFER_STATE buf;
946 
947  yylex_init(&yyscanner);
948  buf = yy_scan_string(string, yyscanner);
949 
950  jsgf = jsgf_grammar_new(parent);
951  if (!parent)
952  jsgf_set_search_path(jsgf, NULL);
953 
954  yyrv = yyparse(yyscanner, jsgf);
955  if (yyrv != 0) {
956  E_ERROR("Failed to parse JSGF grammar from input string\n");
957  jsgf_grammar_free(jsgf);
958  yy_delete_buffer(buf, yyscanner);
959  yylex_destroy(yyscanner);
960  return NULL;
961  }
962  yy_delete_buffer(buf, yyscanner);
963  yylex_destroy(yyscanner);
964 
965  return jsgf;
966 }
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition: ckd_alloc.h:253
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition: err.h:99
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
File names related operation.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
Definition: filename.c:68
SPHINXBASE_EXPORT glist_t glist_reverse(glist_t g)
Reverse the order of the given glist.
Definition: glist.c:169
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed.
Definition: glist.c:133
SPHINXBASE_EXPORT gnode_t * gnode_free(gnode_t *gn, gnode_t *pred)
Free the given node, gn, of a glist, pred being its predecessor in the list.
Definition: glist.c:257
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
Hash table implementation.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:688
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:682
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
Definition: hash_table.c:302
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:646
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:656
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
Definition: hash_table.c:501
void jsgf_grammar_free(jsgf_t *jsgf)
Free a JSGF grammar.
Definition: jsgf.c:108
fsg_model_t * jsgf_read_string(const char *string, logmath_t *lmath, float32 lw)
Read JSGF from string and return FSG object from it.
Definition: jsgf.c:633
jsgf_rule_t * jsgf_get_rule(jsgf_t *grammar, char const *name)
Get a rule by name from a grammar.
Definition: jsgf.c:469
fsg_model_t * jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition: jsgf.c:588
jsgf_t * jsgf_grammar_new(jsgf_t *parent)
Create a new JSGF grammar.
Definition: jsgf.c:86
jsgf_t * jsgf_parse_file(const char *filename, jsgf_t *parent)
Parse a JSGF grammar from a file.
Definition: jsgf.c:899
int jsgf_rule_public(jsgf_rule_t *rule)
Test if a rule is public or not.
Definition: jsgf.c:519
char const * jsgf_grammar_name(jsgf_t *jsgf)
Get the grammar name from the file.
Definition: jsgf.c:216
fsg_model_t * jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition: jsgf.c:595
char const * jsgf_rule_name(jsgf_rule_t *rule)
Get the rule name from a rule.
Definition: jsgf.c:513
jsgf_rule_t * jsgf_get_public_rule(jsgf_t *grammar)
Returns the first public rule of the grammar.
Definition: jsgf.c:484
int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
Convert a JSGF rule to Sphinx FSG text form.
Definition: jsgf.c:666
jsgf_t * jsgf_parse_string(const char *string, jsgf_t *parent)
Parse a JSGF grammar from a string.
Definition: jsgf.c:940
fsg_model_t * jsgf_read_file(const char *file, logmath_t *lmath, float32 lw)
Read JSGF from file and return FSG object from it.
Definition: jsgf.c:602
jsgf_rule_iter_t * jsgf_rule_iter(jsgf_t *grammar)
Get an iterator over all rules in a grammar.
Definition: jsgf.c:463
JSGF grammar compiler.
#define jsgf_rule_iter_rule(itor)
Get the current rule in a rule iterator.
Definition: jsgf.h:127
#define jsgf_rule_iter_free(itor)
Free a rule iterator (if the end hasn't been reached).
Definition: jsgf.h:132
#define jsgf_rule_iter_next(itor)
Advance an iterator to the next rule in the grammar.
Definition: jsgf.h:122
Internal definitions for JSGF grammar compiler.
SPHINXBASE_EXPORT logmath_t * logmath_init(float64 base, int shift, int use_table)
Initialize a log math computation table.
Definition: logmath.c:62
SPHINXBASE_EXPORT int logmath_free(logmath_t *lmath)
Free a log table.
Definition: logmath.c:342
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
Miscellaneous useful string functions.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string.
Definition: strfuncs.c:70
Word level FSG definition.
Definition: fsg_model.h:99
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:109
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:110
A node in a generic list.
Definition: glist.h:100
A note by ARCHAN at 20050510: Technically what we use is so-called "hash table with buckets" which is...
Definition: hash_table.h:149
void * val
Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...
Definition: hash_table.h:155
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
float weight
Weight (default 1)
char * name
Rule or token name.
glist_t atoms
Sequence of items.
jsgf_rhs_t * alt
Linked list of alternates.
char * name
Rule name (NULL for an alternation/grouping)
int is_public
Is this rule marked 'public'?
jsgf_rhs_t * rhs
Expansion.
int refcnt
Reference count.
Definition: jsgf_internal.h:99
int entry
The entry-state for this expansion.
Definition: jsgf_internal.h:95
jsgf_rule_t * rule
The rule being expanded.
Definition: jsgf_internal.h:94
char * locale
JSGF locale (default C)
Definition: jsgf_internal.h:78
glist_t rulestack
Stack of currently expanded rules.
Definition: jsgf_internal.h:89
int nstate
Number of generated states.
Definition: jsgf_internal.h:87
glist_t links
Generated FSG links.
Definition: jsgf_internal.h:88
hash_table_t * imports
Pointers to imported grammars.
Definition: jsgf_internal.h:82
glist_t searchpath
List of directories to search for grammars.
Definition: jsgf_internal.h:84
char * name
Grammar name.
Definition: jsgf_internal.h:79
char * charset
JSGF charset (default UTF-8)
Definition: jsgf_internal.h:77
char * version
JSGF version (from header)
Definition: jsgf_internal.h:76
jsgf_t * parent
Parent grammar (if this is an imported one)
Definition: jsgf_internal.h:83
hash_table_t * rules
Defined or imported rules in this grammar.
Definition: jsgf_internal.h:81