1 package DTDDoc;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.HashMap;
8 import java.util.HashSet;
9 import java.util.Iterator;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Set;
13
14 import com.wutka.dtd.DTD;
15 import com.wutka.dtd.DTDAttlist;
16 import com.wutka.dtd.DTDAttribute;
17 import com.wutka.dtd.DTDComment;
18 import com.wutka.dtd.DTDContainer;
19 import com.wutka.dtd.DTDElement;
20 import com.wutka.dtd.DTDEntity;
21 import com.wutka.dtd.DTDItem;
22 import com.wutka.dtd.DTDName;
23 import com.wutka.dtd.DTDParser;
24
25 /** An object of this class represents a full DTD. That is, the result of
26 * interpretation of a DTD file and all the other files it includes.
27 * A DTD defines a bunch of elements and attributes (and small other things
28 * as well). A DTD file can includes other DTD's file. Therefore some
29 * pieces, although defined in some other files, are defined in the
30 * DTD represented by <code>this</code>. We call such pieces "external".
31 *
32 * One of the goals of this class is to improve over Mark Wutka's DTD class so
33 * that one can know if an element/attribute is "external" or not.
34 *
35 * @author Stefan Champailler
36 */
37
38 public class ExtendedDTD {
39
40 private Logger log;
41 private boolean getAroundNetBeanComments;
42
43 private DefinitionsMap definitionsMap;
44
45 /** Full path to the original DTD. */
46 private File systemPath;
47
48 /** The DTD, as parsed by Wutka's DTD parser. */
49 private DTD dtd;
50
51 /** Title of the DTD. See the @title tag.*/
52 private String title = null;
53
54 /** Doctype of the DTD. See the @doctype tag.*/
55 private String doctype = null;
56
57 /** Root tag of the DTD as declared by @root tag.*/
58 private DTDElement rootElement = null;
59
60 /** Encoding of the file if specified. For example, ISO-8859-1. This is dictated
61 * by the <? ... encoding="" ?> processing instruction.
62 */
63
64 private String encoding = null;
65
66 /** Get the encoding specified in the file, if any. For example, ISO-8859-1. This is dictated
67 * by the <? ... encoding="" ?> processing instruction.
68 * @return the encoding, or <code>null</code> if no encoding was set
69 * @see #getEffectiveEncoding()
70 */
71
72 public String getEncoding() {
73 return encoding;
74 }
75
76 /** Get the effective encoding of the file: either the encoding specified in the file,
77 * or the default encoding (UTF-8) if not specified
78 * @return the effective encoding (not <code>null</code>)
79 */
80
81
82
83
84
85 public String getEffectiveEncoding() {
86 return (encoding == null) ? "UTF-8" : encoding;
87 }
88
89 /** Mapping between each element of the DTD and its parent (as long as
90 * the parent is defined in this DTD). key = String: element's name,
91 * value = (Set of DTDElement) its parent elements (most of the time, only one parent) */
92 private Map parents = null;
93
94 /** Returns the title of the DTD (see the @title tag).
95 * @return the title of the DTD. */
96 public String getTitle() {
97 return title;
98 }
99
100 /** Returns the doctype of the DTD (see the doctype tag).
101 * @return the doctype of the DTD. */
102 public String getDoctype() {
103 return doctype;
104 }
105
106 /** Return all the DTDItems of the DTD (a transparent wrapper around
107 * the same function in Wutka's library).
108 * @return The DTD Items of the DTD. */
109 public List getItems() {
110 return dtd.items;
111 }
112
113 /** Return all the DTDElements of the DTD (a transparent wrapper around
114 * the same function in Wutka's library).
115 * @return A mapping from the name of the elements to their instance. */
116 public Map getElements() {
117 return dtd.elements;
118 }
119
120 public Collection getElementsCollection() {
121 return dtd.elements.values();
122 }
123
124 public DTDElement getElementByName( String name) {
125 Object o = getElements().get( name);
126 if (o == null) {
127 log.warn("Invalid name or the element doesn't exist (" + name + ").");
128 }
129 return (DTDElement) o;
130 }
131
132 public DTDElement getElementByName( DTDName name) {
133 return getElementByName( name.getValue());
134 }
135
136 /** Return all the DTDEntities of the DTD (a transparent wrapper around
137 * the same function in Wutka's library).
138 * @return A mapping from the name of the entities to their instance. */
139 public Map getEntities() {
140 return dtd.entities;
141 }
142
143 /** Returns the root elements of the DTD, as defined by the \@root tag.
144 * @return the root element. */
145 public DTDElement getRootElement() {
146 return rootElement;
147 }
148
149 /** Setter for the root elements of the DTD.
150 * @param element The root element.*/
151 public void setRootElement(DTDElement element) {
152 rootElement = element;
153 }
154
155 /** Returns the full path to the original DTD.
156 * @return Full path to the original DTD.. */
157 public File getSystemPath() {
158 return systemPath;
159 }
160
161 /** Tells if an element is declared in a DTD outside the one represented in
162 * <code>this</code>.
163 *
164 * @param element element to be checked for externality
165 * @return true if external, false if not (or element not part of the DTD's
166 * elements. */
167
168 public boolean isExternal(DTDElement element) {
169 String elementPath = definitionsMap.getLocation(element);
170 return !elementPath.equals(getSystemPath().getPath());
171 }
172
173 /** Determine if an attributes list is defined in this DTD.
174 * @param attList attributes list to look for.
175 * @return true if the list is defined outside this DTD (therefore
176 * in another one. */
177
178 public boolean isExternal(DTDAttlist attList) {
179 return isExternal(getElementByName(attList.getName()));
180 }
181
182 /** Gives the file name of the DTD where a given element is defined.
183 *
184 * @param element element to be checked
185 * @return the file name or <code>null</code> if the element is
186 * not part of the DTD. */
187
188 public String getElementOrigin(DTDElement element) {
189 String elementPath = definitionsMap.getLocation(element);
190
191 if (elementPath == null)
192 log.warn(
193 "Requesting info about a non existing element: "
194 + element.name);
195 return elementPath;
196 }
197
198 /** Build all the information needed to make a ExtendedDTD out of
199 * a "normal" DTD. The original DTD will be read with Mark Wutka's
200 * powerful DTD parser.
201 *
202 * @param dtdFilePath Path to the original DTD. */
203
204 public ExtendedDTD(File dtdFilePath, Logger log, boolean
205 getAroundNetBeanComments) throws IOException {
206
207 this.log = log;
208 this.getAroundNetBeanComments = getAroundNetBeanComments;
209 this.systemPath = dtdFilePath;
210
211 if (!getSystemPath().canRead())
212 throw new IOException(
213 "Can't read " + getSystemPath()
214 + ". Be prepared to get tons of errors !");
215
216 DTDParser dtdParser = new DTDParser(getSystemPath());
217 dtd = dtdParser.parse(true);
218 encoding = dtdParser.getDTDEncoding();
219
220 definitionsMap = locateElements(getSystemPath().getPath(), dtd);
221
222 findDeclaredRootElement();
223 attributesListMap = makeAttributesListsMap(dtd.items);
224 parents = findParents(dtd);
225 }
226
227 /** For a given DTD, this function will build a map
228 * associating each of its elements (included those defined in
229 * its children) to the system-name of the DTDs in which those elements
230 * are defined. This function is necessary because Wutka's code
231 * doesn't do that correctly.
232 *
233 * This function is VERY INEFFICIENT !
234 *
235 * @param filePath where the DTD file is located.
236 */
237
238 private DefinitionsMap locateElements(String filePath, DTD dtd) throws IOException {
239
240
241
242
243 if (dtd == null) {
244 File f = new File(filePath);
245
246 try {
247 DTDParser dtdParser = new DTDParser(f);
248 dtd = dtdParser.parse(true);
249 } catch (Exception ex) {
250 log.warn("locateElements():can't read " + filePath);
251 return null;
252 }
253 }
254
255
256
257 DefinitionsMap defMap = new DefinitionsMap();
258
259
260
261
262 Iterator items = dtd.entities.values().iterator();
263 while (items.hasNext()) {
264
265 DTDEntity entity = (DTDEntity) items.next();
266
267
268
269
270 String nextFile = null;
271
272 if (entity.getExternalID() != null)
273 nextFile = entity.getExternalID().getSystem();
274
275
276
277
278 if (nextFile != null
279 && !(nextFile.startsWith("http:")
280 || nextFile.startsWith("file:"))) {
281
282
283
284
285
286
287 nextFile =
288 filePath.substring(
289 0,
290 filePath.lastIndexOf(File.separatorChar) + 1)
291 + nextFile;
292
293
294 defMap.merge(locateElements(nextFile, null));
295 }
296 }
297
298
299
300
301
302
303
304 items = dtd.elements.values().iterator();
305 while (items.hasNext()) {
306 DTDElement element = (DTDElement) items.next();
307 defMap.add(element, filePath);
308 }
309
310 return defMap;
311 }
312
313 /** Tag starting a root element defintion (@root) */
314 private static final String ROOT_TAG = DTDCommenter.ROOT_TAG;
315
316 /** Tag starting a DTD title defintion (@title) */
317 private static final String TITLE_TAG = DTDCommenter.TITLE_TAG;
318
319 /** Tag starting a DTD doctype defintion (@doctype) */
320 private static final String DOCTYPE_TAG = DTDCommenter.DOCTYPE_TAG;
321
322 /** This function will locate the "@root" element in the comments
323 * in a given DTD.
324 * With that information, it will determine the root of the
325 * DTD and update the necessary member (i.e. rootElement).
326 *
327 * @return the value of the first "@root" doc-tag found.
328 */
329
330 private String findRootTagValue() {
331
332 String rootName = null;
333 Iterator elements = getItems().iterator();
334
335 while (elements.hasNext()) {
336
337 Object obj = elements.next();
338
339
340 if (obj instanceof DTDComment) {
341
342 DTDComment comment = (DTDComment) obj;
343
344
345
346
347
348
349 CommentParser cp = new CommentParser( comment, log,
350 getAroundNetBeanComments);
351 String rn = cp.getUniqueTagValue(ROOT_TAG);
352
353
354
355
356
357 if (rn != null) {
358 DTDElement rnElmt = getElementByName(rn);
359 if ((rnElmt != null) && (!isExternal(rnElmt))) {
360 rootName = rn;
361 } else
362 log.warn(
363 "The root element you specified with \""
364 + ROOT_TAG + ' ' + rn
365 + "\" doesn't exist in the DTD !");
366 }
367
368
369 String t = cp.getUniqueTagValue(TITLE_TAG);
370 if (t != null) {
371 title = t;
372 }
373
374 t = cp.getUniqueTagValue(DOCTYPE_TAG);
375 if (t != null) {
376 doctype = t;
377 }
378 } else if ((obj instanceof DTDElement) || (obj instanceof DTDEntity)) {
379
380
381 break;
382 }
383 }
384
385 if (title == null) {
386 title = Tools.getFilename(systemPath.getName());
387 }
388
389 return rootName;
390 }
391
392 /** This function will set the rootElement member in the dtd
393 * according to the value to be found in the "@root" doc-tag. */
394
395 private void findDeclaredRootElement() {
396
397 String rootName = findRootTagValue();
398
399
400 if (rootName != null) {
401 DTDElement rootNameElement = getElementByName(rootName);
402
403
404 if (rootNameElement != null) {
405
406 setRootElement(rootNameElement);
407 } else
408
409 log.warn(
410 "The provided root element ("
411 + ROOT_TAG + " " + rootName
412 + ") is nowhere to be found in the DTD!");
413
414 }
415 }
416
417 /** Gives a unique id for an attribute in a DTD. The id construction
418 * relies on the fact that an attribute is always related to a element.
419 * That is each (element, attribute) pair is unique in a DTD.
420 *
421 * @param attList The attributes list the attibute is part of.
422 * @param attribute The attribute for which we seek the id.
423 * @return The id of the attribute.
424 */
425
426 public static String getUniqueId( DTDAttlist attList, DTDAttribute attribute) {
427
428 return attList.name + '_' + attribute.getName();
429 }
430
431 /** Gives a unique id for an attribute in a DTD.
432 *
433 * @param element The element to which the attribute belongs.
434 * @param attribute The attribute to get the id for.
435 * @return The id of the attribute.
436 * @see #getUniqueId( DTDAttlist attList, DTDAttribute attribute)
437 */
438 public static String getUniqueId( DTDElement element, DTDAttribute attribute) {
439 return element.name + '_' + attribute.getName();
440 }
441
442 /** Gives a unique id for an element of a DTD.
443 *
444 * @param element The element.
445 * @return The id of the element.
446 */
447
448 public static String getUniqueId( DTDElement element) {
449 return element.getName();
450 }
451
452 /**
453 * key = DTDAttribute , value = DTDAttList
454 * @see #makeAttributesListsMap
455 */
456 private final Map attributesListMap;
457
458 public DTDAttlist locateAttributesList(DTDAttribute attribute) {
459 return (DTDAttlist) attributesListMap.get(attribute);
460 }
461
462 private static Map makeAttributesListsMap(List dtdItems) {
463
464 Map hash = new HashMap();
465
466 for (Iterator iter = dtdItems.iterator(); iter.hasNext(); ) {
467 Object item = iter.next();
468
469 if (item instanceof DTDAttlist) {
470 DTDAttlist attList = (DTDAttlist) item;
471
472 for (Iterator iter2 = attList.attributes.iterator(); iter2.hasNext(); )
473 hash.put(iter2.next(), attList);
474 }
475 }
476
477 return hash;
478 }
479
480 /** Builds an map associating each descendant of an element to its parent
481 * (the element itself :)). All the elements of the given DTD are
482 * examined.
483 *
484 * @param dtd A DTD where to look for the elements
485 * @return A map associating the names (string) of each element
486 * to a Set of its parents (most of the time, only one parent). */
487
488 private static Map findParents(DTD dtd) {
489 Map parents = new HashMap();
490
491 Iterator i = dtd.elements.values().iterator();
492
493
494 while(i.hasNext()) {
495
496 DTDElement element = (DTDElement) i.next();
497
498
499
500 String k = element.getName();
501
502 if( !parents.containsKey( k))
503 parents.put( k, null);
504
505
506
507
508
509 Set children = collectDeclaredChildren( element);
510
511 if( children != null) {
512 for ( Iterator j = children.iterator(); j.hasNext();) {
513 Object o = j.next();
514
515 if (o instanceof DTDName) {
516
517 String cName = ((DTDName) o).getValue();
518
519
520
521
522
523 if( !element.getName().equals( cName)) {
524 Set parentElements = (Set) parents.get(cName);
525
526 if (parentElements == null)
527 parentElements = new HashSet();
528
529 parentElements.add(element);
530
531 parents.put(cName, parentElements);
532 }
533
534 }
535 }
536 }
537 }
538
539 return parents;
540 }
541
542 /** Gets the parents of an element.
543 *
544 * @param sonName The name of the element to look for.
545 * @return Null if the element doesn't exist or it has no parent.
546 */
547
548 public Set getParents(String sonName) {
549 return (Set) parents.get(sonName);
550 }
551
552 /** Check if an element is root, that is, if it has no parent.
553 *
554 * @param element The element to check.
555 * @return True if its is root, false else.
556 */
557
558 public boolean isRoot( DTDElement element) {
559 Set parents = getParents( element.getName());
560 return parents == null || parents.size() == 0;
561 }
562
563 /** Collects the elements that appear in the definition of
564 * a given element (its "children").
565 *
566 * <p>Special definitions such as ANY, EMPTY and PCDATA are ignored.</p>
567 *
568 * @param element The element.
569 * @return The set of children or null if there ain't any child.
570 */
571
572 public static Set collectDeclaredChildren( DTDElement element) {
573 return collectDeclaredChildrenHelper( element.getContent());
574 }
575
576 private static Set collectDeclaredChildrenHelper( DTDItem item) {
577
578 if (item instanceof DTDContainer) {
579
580 Iterator items = ((DTDContainer) item).getItemsVec().iterator();
581
582 Set children = null;
583
584 while (items.hasNext()) {
585 Set s = collectDeclaredChildrenHelper( (DTDItem)items.next());
586 if (s != null) {
587 if( children == null) children = new HashSet();
588 children.addAll(s);
589 }
590 }
591
592 return children;
593
594 } else if (item instanceof DTDName)
595
596 return Collections.singleton( item);
597
598 else
599
600
601 return null;
602 }
603
604 }