SphinxBase 5prealpha
jsgf.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2007 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#include <string.h>
39#include <assert.h>
40
42#include "sphinxbase/strfuncs.h"
44#include "sphinxbase/filename.h"
45#include "sphinxbase/err.h"
46#include "sphinxbase/jsgf.h"
47
48#include "jsgf_internal.h"
49#include "jsgf_parser.h"
50#include "jsgf_scanner.h"
51
52extern int yyparse(void *scanner, jsgf_t * jsgf);
53
61static int expand_rule(jsgf_t * grammar, jsgf_rule_t * rule,
62 int rule_entry, int rule_exit);
63
65jsgf_atom_new(char *name, float weight)
66{
67 jsgf_atom_t *atom;
68
69 atom = ckd_calloc(1, sizeof(*atom));
70 atom->name = ckd_salloc(name);
71 atom->weight = weight;
72 return atom;
73}
74
75int
76jsgf_atom_free(jsgf_atom_t * atom)
77{
78 if (atom == NULL)
79 return 0;
80 ckd_free(atom->name);
81 ckd_free(atom);
82 return 0;
83}
84
85jsgf_t *
87{
88 jsgf_t *grammar;
89
90 grammar = ckd_calloc(1, sizeof(*grammar));
91 /* If this is an imported/subgrammar, then we will share a global
92 * namespace with the parent grammar. */
93 if (parent) {
94 grammar->rules = parent->rules;
95 grammar->imports = parent->imports;
96 grammar->searchpath = parent->searchpath;
97 grammar->parent = parent;
98 }
99 else {
100 grammar->rules = hash_table_new(64, 0);
101 grammar->imports = hash_table_new(16, 0);
102 }
103
104 return grammar;
105}
106
107void
109{
110 /* FIXME: Probably should just use refcounting instead. */
111 if (jsgf->parent == NULL) {
112 hash_iter_t *itor;
113 gnode_t *gn;
114
115 for (itor = hash_table_iter(jsgf->rules); itor;
116 itor = hash_table_iter_next(itor)) {
117 ckd_free((char *) itor->ent->key);
118 jsgf_rule_free((jsgf_rule_t *) itor->ent->val);
119 }
120 hash_table_free(jsgf->rules);
121 for (itor = hash_table_iter(jsgf->imports); itor;
122 itor = hash_table_iter_next(itor)) {
123 ckd_free((char *) itor->ent->key);
124 jsgf_grammar_free((jsgf_t *) itor->ent->val);
125 }
127 for (gn = jsgf->searchpath; gn; gn = gnode_next(gn))
128 ckd_free(gnode_ptr(gn));
129 glist_free(jsgf->searchpath);
130 for (gn = jsgf->links; gn; gn = gnode_next(gn))
131 ckd_free(gnode_ptr(gn));
132 glist_free(jsgf->links);
133 }
134 ckd_free(jsgf->name);
135 ckd_free(jsgf->version);
136 ckd_free(jsgf->charset);
137 ckd_free(jsgf->locale);
138 ckd_free(jsgf);
139}
140
141static void
142jsgf_rhs_free(jsgf_rhs_t * rhs)
143{
144 gnode_t *gn;
145
146 if (rhs == NULL)
147 return;
148
149 jsgf_rhs_free(rhs->alt);
150 for (gn = rhs->atoms; gn; gn = gnode_next(gn))
151 jsgf_atom_free(gnode_ptr(gn));
152 glist_free(rhs->atoms);
153 ckd_free(rhs);
154}
155
157jsgf_kleene_new(jsgf_t * jsgf, jsgf_atom_t * atom, int plus)
158{
159 jsgf_rule_t *rule;
160 jsgf_atom_t *rule_atom;
161 jsgf_rhs_t *rhs;
162
163 /* Generate an "internal" rule of the form (<NULL> | <name> <g0006>) */
164 /* Or if plus is true, (<name> | <name> <g0006>) */
165 rhs = ckd_calloc(1, sizeof(*rhs));
166 if (plus)
167 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0));
168 else
169 rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("<NULL>", 1.0));
170 rule = jsgf_define_rule(jsgf, NULL, rhs, 0);
171 rule_atom = jsgf_atom_new(rule->name, 1.0);
172 rhs = ckd_calloc(1, sizeof(*rhs));
173 rhs->atoms = glist_add_ptr(NULL, rule_atom);
174 rhs->atoms = glist_add_ptr(rhs->atoms, atom);
175 rule->rhs->alt = rhs;
176
177 return jsgf_atom_new(rule->name, 1.0);
178}
179
181jsgf_optional_new(jsgf_t * jsgf, jsgf_rhs_t * exp)
182{
183 jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs));
184 jsgf_atom_t *atom = jsgf_atom_new("<NULL>", 1.0);
185 rhs->alt = exp;
186 rhs->atoms = glist_add_ptr(NULL, atom);
187 return jsgf_define_rule(jsgf, NULL, rhs, 0);
188}
189
190void
191jsgf_add_link(jsgf_t * grammar, jsgf_atom_t * atom, int from, int to)
192{
193 jsgf_link_t *link;
194
195 link = ckd_calloc(1, sizeof(*link));
196 link->from = from;
197 link->to = to;
198 link->atom = atom;
199 grammar->links = glist_add_ptr(grammar->links, link);
200}
201
202static char *
203extract_grammar_name(char *rule_name)
204{
205 char *dot_pos;
206 char *grammar_name = ckd_salloc(rule_name + 1);
207 if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) {
208 ckd_free(grammar_name);
209 return NULL;
210 }
211 *dot_pos = '\0';
212 return grammar_name;
213}
214
215char const *
217{
218 return jsgf->name;
219}
220
221static char *
222jsgf_fullname(jsgf_t * jsgf, const char *name)
223{
224 char *fullname;
225
226 /* Check if it is already qualified */
227 if (strchr(name + 1, '.'))
228 return ckd_salloc(name);
229
230 /* Skip leading < in name */
231 fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4);
232 sprintf(fullname, "<%s.%s", jsgf->name, name + 1);
233 return fullname;
234}
235
236static char *
237jsgf_fullname_from_rule(jsgf_rule_t * rule, const char *name)
238{
239 char *fullname, *grammar_name;
240
241 /* Check if it is already qualified */
242 if (strchr(name + 1, '.'))
243 return ckd_salloc(name);
244
245 /* Skip leading < in name */
246 if ((grammar_name = extract_grammar_name(rule->name)) == NULL)
247 return ckd_salloc(name);
248 fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4);
249 sprintf(fullname, "<%s.%s", grammar_name, name + 1);
250 ckd_free(grammar_name);
251
252 return fullname;
253}
254
255/* Extract as rulename everything after the secondlast dot, if existent.
256 * Because everything before the secondlast dot is the path-specification. */
257static char *
258importname2rulename(char *importname)
259{
260 char *rulename = ckd_salloc(importname);
261 char *last_dotpos;
262 char *secondlast_dotpos;
263
264 if ((last_dotpos = strrchr(rulename + 1, '.')) != NULL) {
265 *last_dotpos = '\0';
266 if ((secondlast_dotpos = strrchr(rulename + 1, '.')) != NULL) {
267 *last_dotpos = '.';
268 *secondlast_dotpos = '<';
269 secondlast_dotpos = ckd_salloc(secondlast_dotpos);
270 ckd_free(rulename);
271 return secondlast_dotpos;
272 }
273 else {
274 *last_dotpos = '.';
275 return rulename;
276 }
277 }
278 else {
279 return rulename;
280 }
281}
282
283#define NO_NODE -1
284#define RECURSIVE_NODE -2
285
294static int
295expand_rhs(jsgf_t * grammar, jsgf_rule_t * rule, jsgf_rhs_t * rhs,
296 int rule_entry, int rule_exit)
297{
298 gnode_t *gn;
299 int lastnode;
300
301 /* Last node expanded in this sequence. */
302 lastnode = rule_entry;
303
304 /* Iterate over atoms in rhs and generate links/nodes */
305 for (gn = rhs->atoms; gn; gn = gnode_next(gn)) {
306 jsgf_atom_t *atom = gnode_ptr(gn);
307
308 if (jsgf_atom_is_rule(atom)) {
309 jsgf_rule_t *subrule;
310 char *fullname;
311 gnode_t *subnode;
312 jsgf_rule_stack_t *rule_stack_entry = NULL;
313
314 /* Special case for <NULL> and <VOID> pseudo-rules
315 If this is the only atom in the rhs, and it's the
316 first rhs in the rule, then emit a null transition,
317 creating an exit state if needed. */
318 if (0 == strcmp(atom->name, "<NULL>")) {
319 if (gn == rhs->atoms && gnode_next(gn) == NULL) {
320 if (rule_exit == NO_NODE) {
321 jsgf_add_link(grammar, atom,
322 lastnode, grammar->nstate);
323 rule_exit = lastnode = grammar->nstate;
324 ++grammar->nstate;
325 }
326 else {
327 jsgf_add_link(grammar, atom, lastnode, rule_exit);
328 }
329 }
330 continue;
331 }
332 else if (0 == strcmp(atom->name, "<VOID>")) {
333 /* Make this entire RHS unspeakable */
334 return NO_NODE;
335 }
336
337 fullname = jsgf_fullname_from_rule(rule, atom->name);
339 (grammar->rules, fullname, (void **) &subrule) == -1) {
340 E_ERROR("Undefined rule in RHS: %s\n", fullname);
341 ckd_free(fullname);
342 return NO_NODE;
343 }
344 ckd_free(fullname);
345
346 /* Look for this subrule in the stack of expanded rules */
347 for (subnode = grammar->rulestack; subnode;
348 subnode = gnode_next(subnode)) {
349 rule_stack_entry =
350 (jsgf_rule_stack_t *) gnode_ptr(subnode);
351 if (rule_stack_entry->rule == subrule)
352 break;
353 }
354
355 if (subnode != NULL) {
356 /* Allow right-recursion only. */
357 if (gnode_next(gn) != NULL) {
358 E_ERROR
359 ("Only right-recursion is permitted (in %s.%s)\n",
360 grammar->name, rule->name);
361 return NO_NODE;
362 }
363 /* Add a link back to the beginning of this rule instance */
364 E_INFO("Right recursion %s %d => %d\n", atom->name,
365 lastnode, rule_stack_entry->entry);
366 jsgf_add_link(grammar, atom, lastnode,
367 rule_stack_entry->entry);
368
369 /* Let our caller know that this rhs didn't reach an
370 end state. */
371 lastnode = RECURSIVE_NODE;
372 }
373 else {
374 /* If this is the last atom in this rhs, link its
375 expansion to the parent rule's exit state.
376 Otherwise, create a new exit state for it. */
377 int subruleexit = NO_NODE;
378 if (gnode_next(gn) == NULL && rule_exit >= 0)
379 subruleexit = rule_exit;
380
381 /* Expand the subrule */
382 lastnode =
383 expand_rule(grammar, subrule, lastnode, subruleexit);
384
385 if (lastnode == NO_NODE)
386 return NO_NODE;
387 }
388 }
389 else {
390 /* An exit-state is created if this isn't the last atom
391 in the rhs, or if the containing rule doesn't have an
392 exit state yet.
393 Otherwise, the rhs's exit state becomes the containing
394 rule's exit state. */
395 int exitstate;
396 if (gnode_next(gn) == NULL && rule_exit >= 0) {
397 exitstate = rule_exit;
398 }
399 else {
400 exitstate = grammar->nstate;
401 ++grammar->nstate;
402 }
403
404 /* Add a link for this token */
405 jsgf_add_link(grammar, atom, lastnode, exitstate);
406 lastnode = exitstate;
407 }
408 }
409
410 return lastnode;
411}
412
413static int
414expand_rule(jsgf_t * grammar, jsgf_rule_t * rule, int rule_entry,
415 int rule_exit)
416{
417 jsgf_rule_stack_t *rule_stack_entry;
418 jsgf_rhs_t *rhs;
419
420 /* Push this rule onto the stack */
421 rule_stack_entry =
423 rule_stack_entry->rule = rule;
424 rule_stack_entry->entry = rule_entry;
425 grammar->rulestack = glist_add_ptr(grammar->rulestack,
426 rule_stack_entry);
427
428 for (rhs = rule->rhs; rhs; rhs = rhs->alt) {
429 int lastnode;
430
431 lastnode = expand_rhs(grammar, rule, rhs, rule_entry, rule_exit);
432
433 if (lastnode == NO_NODE) {
434 return NO_NODE;
435 }
436 else if (lastnode == RECURSIVE_NODE) {
437 /* The rhs ended with right-recursion, i.e. a transition to
438 an earlier state. Nothing needs to happen at this level. */
439 ;
440 }
441 else if (rule_exit == NO_NODE) {
442 /* If this rule doesn't have an exit state yet, use the exit
443 state of its first right-hand-side.
444 All other right-hand-sides will use this exit state. */
445 assert(lastnode >= 0);
446 rule_exit = lastnode;
447 }
448 }
449
450 /* If no exit-state was created, use the entry-state. */
451 if (rule_exit == NO_NODE) {
452 rule_exit = rule_entry;
453 }
454
455 /* Pop this rule from the rule stack */
456 ckd_free(gnode_ptr(grammar->rulestack));
457 grammar->rulestack = gnode_free(grammar->rulestack, NULL);
458
459 return rule_exit;
460}
461
464{
465 return hash_table_iter(grammar->rules);
466}
467
469jsgf_get_rule(jsgf_t * grammar, char const *name)
470{
471 void *val;
472 char *fullname;
473
474 fullname = string_join("<", name, ">", NULL);
475 if (hash_table_lookup(grammar->rules, fullname, &val) < 0) {
476 ckd_free(fullname);
477 return NULL;
478 }
479 ckd_free(fullname);
480 return (jsgf_rule_t *) val;
481}
482
485{
486 jsgf_rule_iter_t *itor;
487 jsgf_rule_t *public_rule = NULL;
488
489 for (itor = jsgf_rule_iter(grammar); itor;
490 itor = jsgf_rule_iter_next(itor)) {
491 jsgf_rule_t *rule = jsgf_rule_iter_rule(itor);
492 if (jsgf_rule_public(rule)) {
493 const char *rule_name = jsgf_rule_name(rule);
494 char *dot_pos;
495 if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) {
496 public_rule = rule;
498 break;
499 }
500 if (0 ==
501 strncmp(rule_name + 1, jsgf_grammar_name(grammar),
502 dot_pos - rule_name - 1)) {
503 public_rule = rule;
505 break;
506 }
507 }
508 }
509 return public_rule;
510}
511
512char const *
514{
515 return rule->name;
516}
517
518int
520{
521 return rule->is_public;
522}
523
524static fsg_model_t *
525jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule,
526 logmath_t * lmath, float32 lw, int do_closure)
527{
528 fsg_model_t *fsg;
529 glist_t nulls;
530 gnode_t *gn;
531 int rule_entry, rule_exit;
532
533 if (grammar == NULL || rule == NULL)
534 return NULL;
535
536 /* Clear previous links */
537 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
538 ckd_free(gnode_ptr(gn));
539 }
540 glist_free(grammar->links);
541 grammar->links = NULL;
542 grammar->nstate = 0;
543
544 /* Create the top-level entry state, and expand the
545 top-level rule. */
546 rule_entry = grammar->nstate++;
547 rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE);
548
549 /* If no exit-state was created, create one. */
550 if (rule_exit == NO_NODE) {
551 rule_exit = grammar->nstate++;
552 jsgf_add_link(grammar, NULL, rule_entry, rule_exit);
553 }
554
555 fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate);
556 fsg->start_state = rule_entry;
557 fsg->final_state = rule_exit;
558 grammar->links = glist_reverse(grammar->links);
559 for (gn = grammar->links; gn; gn = gnode_next(gn)) {
560 jsgf_link_t *link = gnode_ptr(gn);
561
562 if (link->atom) {
563 if (jsgf_atom_is_rule(link->atom)) {
564 fsg_model_null_trans_add(fsg, link->from, link->to,
565 logmath_log(lmath,
566 link->atom->weight));
567 }
568 else {
569 int wid = fsg_model_word_add(fsg, link->atom->name);
570 fsg_model_trans_add(fsg, link->from, link->to,
571 logmath_log(lmath, link->atom->weight),
572 wid);
573 }
574 }
575 else {
576 fsg_model_null_trans_add(fsg, link->from, link->to, 0);
577 }
578 }
579 if (do_closure) {
580 nulls = fsg_model_null_trans_closure(fsg, NULL);
581 glist_free(nulls);
582 }
583
584 return fsg;
585}
586
589 logmath_t * lmath, float32 lw)
590{
591 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE);
592}
593
596 logmath_t * lmath, float32 lw)
597{
598 return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE);
599}
600
602jsgf_read_file(const char *file, logmath_t * lmath, float32 lw)
603{
604 fsg_model_t *fsg;
605 jsgf_rule_t *rule;
606 jsgf_t *jsgf;
607 jsgf_rule_iter_t *itor;
608
609 if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) {
610 E_ERROR("Error parsing file: %s\n", file);
611 return NULL;
612 }
613
614 rule = NULL;
615 for (itor = jsgf_rule_iter(jsgf); itor;
616 itor = jsgf_rule_iter_next(itor)) {
617 rule = jsgf_rule_iter_rule(itor);
618 if (jsgf_rule_public(rule)) {
620 break;
621 }
622 }
623 if (rule == NULL) {
624 E_ERROR("No public rules found in %s\n", file);
625 return NULL;
626 }
627 fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
628 jsgf_grammar_free(jsgf);
629 return fsg;
630}
631
633jsgf_read_string(const char *string, logmath_t * lmath, float32 lw)
634{
635 fsg_model_t *fsg;
636 jsgf_rule_t *rule;
637 jsgf_t *jsgf;
638 jsgf_rule_iter_t *itor;
639
640 if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) {
641 E_ERROR("Error parsing input string\n");
642 return NULL;
643 }
644
645 rule = NULL;
646 for (itor = jsgf_rule_iter(jsgf); itor;
647 itor = jsgf_rule_iter_next(itor)) {
648 rule = jsgf_rule_iter_rule(itor);
649 if (jsgf_rule_public(rule)) {
651 break;
652 }
653 }
654 if (rule == NULL) {
655 jsgf_grammar_free(jsgf);
656 E_ERROR("No public rules found in input string\n");
657 return NULL;
658 }
659 fsg = jsgf_build_fsg(jsgf, rule, lmath, lw);
660 jsgf_grammar_free(jsgf);
661 return fsg;
662}
663
664
665int
666jsgf_write_fsg(jsgf_t * grammar, jsgf_rule_t * rule, FILE * outfh)
667{
668 fsg_model_t *fsg;
669 logmath_t *lmath = logmath_init(1.0001, 0, 0);
670
671 if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL)
672 goto error_out;
673
674 fsg_model_write(fsg, outfh);
675 logmath_free(lmath);
676 return 0;
677
678 error_out:
679 logmath_free(lmath);
680 return -1;
681}
682
684jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs,
685 int is_public)
686{
687 jsgf_rule_t *rule;
688 void *val;
689
690 if (name == NULL) {
691 name = ckd_malloc(strlen(jsgf->name) + 16);
692 sprintf(name, "<%s.g%05d>", jsgf->name,
693 hash_table_inuse(jsgf->rules));
694 }
695 else {
696 char *newname;
697
698 newname = jsgf_fullname(jsgf, name);
699 name = newname;
700 }
701
702 rule = ckd_calloc(1, sizeof(*rule));
703 rule->refcnt = 1;
704 rule->name = ckd_salloc(name);
705 rule->rhs = rhs;
706 rule->is_public = is_public;
707
708 E_INFO("Defined rule: %s%s\n",
709 rule->is_public ? "PUBLIC " : "", rule->name);
710 val = hash_table_enter(jsgf->rules, name, rule);
711 if (val != (void *) rule) {
712 E_WARN("Multiply defined symbol: %s\n", name);
713 }
714 return rule;
715}
716
718jsgf_rule_retain(jsgf_rule_t * rule)
719{
720 ++rule->refcnt;
721 return rule;
722}
723
724int
725jsgf_rule_free(jsgf_rule_t * rule)
726{
727 if (rule == NULL)
728 return 0;
729 if (--rule->refcnt > 0)
730 return rule->refcnt;
731 jsgf_rhs_free(rule->rhs);
732 ckd_free(rule->name);
733 ckd_free(rule);
734 return 0;
735}
736
737
738/* FIXME: This should go in libsphinxutil */
739static char *
740path_list_search(glist_t paths, char *path)
741{
742 gnode_t *gn;
743
744 for (gn = paths; gn; gn = gnode_next(gn)) {
745 char *fullpath;
746 FILE *tmp;
747
748 fullpath = string_join(gnode_ptr(gn), "/", path, NULL);
749 tmp = fopen(fullpath, "r");
750 if (tmp != NULL) {
751 fclose(tmp);
752 return fullpath;
753 }
754 else {
755 ckd_free(fullpath);
756 }
757 }
758 return NULL;
759}
760
762jsgf_import_rule(jsgf_t * jsgf, char *name)
763{
764 char *c, *path, *newpath;
765 size_t namelen, packlen;
766 void *val;
767 jsgf_t *imp;
768 int import_all;
769
770 /* Trim the leading and trailing <> */
771 namelen = strlen(name);
772 path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
773 strcpy(path, name + 1);
774 /* Split off the first part of the name */
775 c = strrchr(path, '.');
776 if (c == NULL) {
777 E_ERROR("Imported rule is not qualified: %s\n", name);
778 ckd_free(path);
779 return NULL;
780 }
781 packlen = c - path;
782 *c = '\0';
783
784 /* Look for import foo.* */
785 import_all = (strlen(name) > 2
786 && 0 == strcmp(name + namelen - 3, ".*>"));
787
788 /* Construct a filename. */
789 for (c = path; *c; ++c)
790 if (*c == '.')
791 *c = '/';
792 strcat(path, ".gram");
793 newpath = path_list_search(jsgf->searchpath, path);
794 if (newpath == NULL) {
795 E_ERROR("Failed to find grammar %s\n", path);
796 ckd_free(path);
797 return NULL;
798 }
799 ckd_free(path);
800
801 path = newpath;
802 E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);
803
804 /* FIXME: Also, we need to make sure that path is fully qualified
805 * here, by adding any prefixes from jsgf->name to it. */
806 /* See if we have parsed it already */
807 if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
808 E_INFO("Already imported %s\n", path);
809 imp = val;
810 ckd_free(path);
811 }
812 else {
813 /* If not, parse it. */
814 imp = jsgf_parse_file(path, jsgf);
815 val = hash_table_enter(jsgf->imports, path, imp);
816 if (val != (void *) imp) {
817 E_WARN("Multiply imported file: %s\n", path);
818 }
819 }
820 if (imp != NULL) {
821 hash_iter_t *itor;
822 /* Look for public rules matching rulename. */
823 for (itor = hash_table_iter(imp->rules); itor;
824 itor = hash_table_iter_next(itor)) {
825 hash_entry_t *he = itor->ent;
826 jsgf_rule_t *rule = hash_entry_val(he);
827 int rule_matches;
828 char *rule_name = importname2rulename(name);
829
830 if (import_all) {
831 /* Match package name (symbol table is shared) */
832 rule_matches =
833 !strncmp(rule_name, rule->name, packlen + 1);
834 }
835 else {
836 /* Exact match */
837 rule_matches = !strcmp(rule_name, rule->name);
838 }
839 ckd_free(rule_name);
840 if (rule->is_public && rule_matches) {
841 void *val;
842 char *newname;
843
844 /* Link this rule into the current namespace. */
845 c = strrchr(rule->name, '.');
846 assert(c != NULL);
847 newname = jsgf_fullname(jsgf, c);
848
849 E_INFO("Imported %s\n", newname);
850 val = hash_table_enter(jsgf->rules, newname,
851 jsgf_rule_retain(rule));
852 if (val != (void *) rule) {
853 E_WARN("Multiply defined symbol: %s\n", newname);
854 }
855 if (!import_all) {
857 return rule;
858 }
859 }
860 }
861 }
862
863 return NULL;
864}
865
866static void
867jsgf_set_search_path(jsgf_t * jsgf, const char *filename)
868{
869 char *jsgf_path;
870
871#if !defined(_WIN32_WCE)
872 if ((jsgf_path = getenv("JSGF_PATH")) != NULL) {
873 char *word, *c;
874 /* FIXME: This should be a function in libsphinxbase. */
875 word = jsgf_path = ckd_salloc(jsgf_path);
876 while ((c = strchr(word, ':'))) {
877 *c = '\0';
878 jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
879 word = c + 1;
880 }
881 jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word);
882 jsgf->searchpath = glist_reverse(jsgf->searchpath);
883 return;
884 }
885#endif
886
887 if (!filename) {
888 jsgf->searchpath =
890 return;
891 }
892
893 jsgf_path = ckd_salloc(filename);
894 path2dirname(filename, jsgf_path);
895 jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path);
896}
897
898jsgf_t *
899jsgf_parse_file(const char *filename, jsgf_t * parent)
900{
901 yyscan_t yyscanner;
902 jsgf_t *jsgf;
903 int yyrv;
904 FILE *in = NULL;
905
906 yylex_init(&yyscanner);
907 if (filename == NULL) {
908 yyset_in(stdin, yyscanner);
909 }
910 else {
911 in = fopen(filename, "r");
912 if (in == NULL) {
913 E_ERROR_SYSTEM("Failed to open %s for parsing", filename);
914 return NULL;
915 }
916 yyset_in(in, yyscanner);
917 }
918
919 jsgf = jsgf_grammar_new(parent);
920
921 if (!parent)
922 jsgf_set_search_path(jsgf, filename);
923
924 yyrv = yyparse(yyscanner, jsgf);
925 if (yyrv != 0) {
926 E_ERROR("Failed to parse JSGF grammar from '%s'\n",
927 filename ? filename : "(stdin)");
928 jsgf_grammar_free(jsgf);
929 yylex_destroy(yyscanner);
930 return NULL;
931 }
932 if (in)
933 fclose(in);
934 yylex_destroy(yyscanner);
935
936 return jsgf;
937}
938
939jsgf_t *
940jsgf_parse_string(const char *string, jsgf_t * parent)
941{
942 yyscan_t yyscanner;
943 jsgf_t *jsgf;
944 int yyrv;
945 YY_BUFFER_STATE buf;
946
947 yylex_init(&yyscanner);
948 buf = yy_scan_string(string, yyscanner);
949
950 jsgf = jsgf_grammar_new(parent);
951 if (!parent)
952 jsgf_set_search_path(jsgf, NULL);
953
954 yyrv = yyparse(yyscanner, jsgf);
955 if (yyrv != 0) {
956 E_ERROR("Failed to parse JSGF grammar from input string\n");
957 jsgf_grammar_free(jsgf);
958 yy_delete_buffer(buf, yyscanner);
959 yylex_destroy(yyscanner);
960 return NULL;
961 }
962 yy_delete_buffer(buf, yyscanner);
963 yylex_destroy(yyscanner);
964
965 return jsgf;
966}
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition: ckd_alloc.h:253
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition: ckd_alloc.h:264
Implementation of logging routines.
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define E_ERROR_SYSTEM(...)
Print error text; Call perror("");.
Definition: err.h:99
#define E_WARN(...)
Print warning message to error log.
Definition: err.h:109
File names related operation.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
Definition: filename.c:68
SPHINXBASE_EXPORT glist_t glist_reverse(glist_t g)
Reverse the order of the given glist.
Definition: glist.c:169
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed.
Definition: glist.c:133
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition: glist.c:74
SPHINXBASE_EXPORT gnode_t * gnode_free(gnode_t *gn, gnode_t *pred)
Free the given node, gn, of a glist, pred being its predecessor in the list.
Definition: glist.c:257
#define gnode_ptr(g)
Head of a list of gnodes.
Definition: glist.h:109
Hash table implementation.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition: hash_table.c:688
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition: hash_table.c:656
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition: hash_table.c:682
SPHINXBASE_EXPORT int32 hash_table_lookup(hash_table_t *h, const char *key, void **val)
Look up a key in a hash table and optionally return the associated value.
Definition: hash_table.c:302
#define hash_entry_val(e)
Access macros.
Definition: hash_table.h:175
SPHINXBASE_EXPORT void * hash_table_enter(hash_table_t *h, const char *key, void *val)
Try to add a new entry with given key and associated value to hash table h.
Definition: hash_table.c:501
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition: hash_table.c:646
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition: hash_table.c:158
jsgf_rule_iter_t * jsgf_rule_iter(jsgf_t *grammar)
Get an iterator over all rules in a grammar.
Definition: jsgf.c:463
void jsgf_grammar_free(jsgf_t *jsgf)
Free a JSGF grammar.
Definition: jsgf.c:108
int jsgf_rule_public(jsgf_rule_t *rule)
Test if a rule is public or not.
Definition: jsgf.c:519
char const * jsgf_rule_name(jsgf_rule_t *rule)
Get the rule name from a rule.
Definition: jsgf.c:513
fsg_model_t * jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition: jsgf.c:595
jsgf_t * jsgf_grammar_new(jsgf_t *parent)
Create a new JSGF grammar.
Definition: jsgf.c:86
fsg_model_t * jsgf_read_string(const char *string, logmath_t *lmath, float32 lw)
Read JSGF from string and return FSG object from it.
Definition: jsgf.c:633
int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh)
Convert a JSGF rule to Sphinx FSG text form.
Definition: jsgf.c:666
fsg_model_t * jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw)
Build a Sphinx FSG object from a JSGF rule.
Definition: jsgf.c:588
fsg_model_t * jsgf_read_file(const char *file, logmath_t *lmath, float32 lw)
Read JSGF from file and return FSG object from it.
Definition: jsgf.c:602
jsgf_rule_t * jsgf_get_public_rule(jsgf_t *grammar)
Returns the first public rule of the grammar.
Definition: jsgf.c:484
char const * jsgf_grammar_name(jsgf_t *jsgf)
Get the grammar name from the file.
Definition: jsgf.c:216
jsgf_t * jsgf_parse_string(const char *string, jsgf_t *parent)
Parse a JSGF grammar from a string.
Definition: jsgf.c:940
jsgf_rule_t * jsgf_get_rule(jsgf_t *grammar, char const *name)
Get a rule by name from a grammar.
Definition: jsgf.c:469
jsgf_t * jsgf_parse_file(const char *filename, jsgf_t *parent)
Parse a JSGF grammar from a file.
Definition: jsgf.c:899
JSGF grammar compiler.
#define jsgf_rule_iter_rule(itor)
Get the current rule in a rule iterator.
Definition: jsgf.h:127
#define jsgf_rule_iter_free(itor)
Free a rule iterator (if the end hasn't been reached).
Definition: jsgf.h:132
#define jsgf_rule_iter_next(itor)
Advance an iterator to the next rule in the grammar.
Definition: jsgf.h:122
Internal definitions for JSGF grammar compiler.
SPHINXBASE_EXPORT int logmath_free(logmath_t *lmath)
Free a log table.
Definition: logmath.c:342
SPHINXBASE_EXPORT logmath_t * logmath_init(float64 base, int shift, int use_table)
Initialize a log math computation table.
Definition: logmath.c:62
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition: logmath.c:447
Miscellaneous useful string functions.
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string.
Definition: strfuncs.c:70
Word level FSG definition.
Definition: fsg_model.h:99
int32 start_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:109
int32 final_state
Must be in the range [0..n_state-1].
Definition: fsg_model.h:110
A node in a generic list.
Definition: glist.h:100
A note by ARCHAN at 20050510: Technically what we use is so-called "hash table with buckets" which is...
Definition: hash_table.h:149
void * val
Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...
Definition: hash_table.h:155
hash_entry_t * ent
Current entry in that table.
Definition: hash_table.h:170
float weight
Weight (default 1)
char * name
Rule or token name.
glist_t atoms
Sequence of items.
jsgf_rhs_t * alt
Linked list of alternates.
char * name
Rule name (NULL for an alternation/grouping)
int is_public
Is this rule marked 'public'?
jsgf_rhs_t * rhs
Expansion.
int refcnt
Reference count.
Definition: jsgf_internal.h:99
int entry
The entry-state for this expansion.
Definition: jsgf_internal.h:95
jsgf_rule_t * rule
The rule being expanded.
Definition: jsgf_internal.h:94
char * locale
JSGF locale (default C)
Definition: jsgf_internal.h:78
glist_t rulestack
Stack of currently expanded rules.
Definition: jsgf_internal.h:89
int nstate
Number of generated states.
Definition: jsgf_internal.h:87
glist_t links
Generated FSG links.
Definition: jsgf_internal.h:88
hash_table_t * imports
Pointers to imported grammars.
Definition: jsgf_internal.h:82
glist_t searchpath
List of directories to search for grammars.
Definition: jsgf_internal.h:84
char * name
Grammar name.
Definition: jsgf_internal.h:79
char * charset
JSGF charset (default UTF-8)
Definition: jsgf_internal.h:77
char * version
JSGF version (from header)
Definition: jsgf_internal.h:76
jsgf_t * parent
Parent grammar (if this is an imported one)
Definition: jsgf_internal.h:83
hash_table_t * rules
Defined or imported rules in this grammar.
Definition: jsgf_internal.h:81