View Javadoc

1   package DTDDoc;
2   
3   import java.io.File;
4   import java.io.IOException;
5   import java.util.Collection;
6   import java.util.Collections;
7   import java.util.HashMap;
8   import java.util.HashSet;
9   import java.util.Iterator;
10  import java.util.List;
11  import java.util.Map;
12  import java.util.Set;
13  
14  import com.wutka.dtd.DTD;
15  import com.wutka.dtd.DTDAttlist;
16  import com.wutka.dtd.DTDAttribute;
17  import com.wutka.dtd.DTDComment;
18  import com.wutka.dtd.DTDContainer;
19  import com.wutka.dtd.DTDElement;
20  import com.wutka.dtd.DTDEntity;
21  import com.wutka.dtd.DTDItem;
22  import com.wutka.dtd.DTDName;
23  import com.wutka.dtd.DTDParser;
24  
25  /** An object of this class represents a full DTD. That is, the result of
26   * interpretation of a DTD file and all the other files it includes.
27   * A DTD defines a bunch of elements and attributes (and small other things
28   * as well). A DTD file can includes other DTD's file. Therefore some
29   * pieces, although defined in some other files, are defined in the
30   * DTD represented by <code>this</code>. We call such pieces "external".
31   *
32   * One of the goals of this class is to improve over Mark Wutka's DTD class so
33   * that one can know if an element/attribute is "external" or not.
34   *
35   * @author Stefan Champailler
36   */
37  
38  public class ExtendedDTD {
39  
40      private Logger log;
41      private boolean getAroundNetBeanComments;
42  
43      private DefinitionsMap definitionsMap;
44  
45      /** Full path to the original DTD. */
46      private File systemPath;
47  
48      /** The DTD, as parsed by Wutka's DTD parser. */
49      private DTD dtd;
50  
51      /** Title of the DTD. See the @title tag.*/
52      private String title = null;
53  
54      /** Doctype of the DTD. See the @doctype tag.*/
55      private String doctype = null;
56  
57      /** Root tag of the DTD as declared by @root tag.*/
58      private DTDElement rootElement = null;
59  
60      /** Encoding of the file if specified. For example, ISO-8859-1. This is dictated
61       *  by the &lt;? ... encoding="" ?&gt; processing instruction.
62       */
63  
64      private String encoding = null;
65  
66      /** Get the encoding specified in the file, if any. For example, ISO-8859-1. This is dictated
67       *  by the &lt;? ... encoding="" ?&gt; processing instruction.
68       *  @return the encoding, or <code>null</code> if no encoding was set
69       *  @see #getEffectiveEncoding()
70       */
71  
72      public String getEncoding() {
73          return encoding;
74      }
75  
76      /** Get the effective encoding of the file: either the encoding specified in the file,
77       * or the default encoding (UTF-8) if not specified
78       *  @return the effective encoding (not <code>null</code>)
79       */
80  
81      // FIXME Is UTF-8 the default encoding when nothing is specified in the DTD
82      // ? Is it in the DTD spec ? Anyway, we should put a reference to the spec
83      // here so I don't ask that myself again.
84  
85      public String getEffectiveEncoding() {
86          return (encoding == null) ? "UTF-8" : encoding;
87      }
88  
89      /** Mapping between each element of the DTD and its parent (as long as
90       *  the parent is defined in this DTD). key = String: element's name,
91       *  value = (Set of DTDElement) its parent elements (most of the time, only one parent) */
92      private Map parents = null;
93  
94      /** Returns the title of the DTD (see the @title tag).
95       *  @return the title of the DTD. */
96      public String getTitle() {
97          return title;
98      }
99  
100     /** Returns the doctype of the DTD (see the doctype tag).
101      *  @return the doctype of the DTD. */
102     public String getDoctype() {
103         return doctype;
104     }
105 
106     /** Return all the DTDItems of the DTD (a transparent wrapper around
107      *  the same function in Wutka's library).
108      * @return The DTD Items of the DTD. */
109     public List getItems() {
110         return dtd.items;
111     }
112 
113     /** Return all the DTDElements of the DTD (a transparent wrapper around
114      *  the same function in Wutka's library).
115      * @return A mapping from the name of the elements to their instance. */
116     public Map getElements() {
117         return dtd.elements;
118     }
119 
120     public Collection getElementsCollection() {
121         return dtd.elements.values();
122     }
123 
124     public DTDElement getElementByName( String name) {
125         Object o = getElements().get( name);
126         if (o == null) {
127             log.warn("Invalid name or the element doesn't exist (" + name + ").");
128         }
129         return (DTDElement) o;
130     }
131 
132     public DTDElement getElementByName( DTDName name) {
133         return getElementByName( name.getValue());
134     }
135 
136     /** Return all the DTDEntities of the DTD (a transparent wrapper around
137      *  the same function in Wutka's library).
138      * @return A mapping from the name of the entities to their instance. */
139     public Map getEntities() {
140         return dtd.entities;
141     }
142 
143     /** Returns the root elements of the DTD, as defined by the \@root tag.
144       * @return the root element. */
145     public DTDElement getRootElement() {
146         return rootElement;
147     }
148 
149     /** Setter for the root elements of the DTD.
150       * @param element The root element.*/
151     public void setRootElement(DTDElement element) {
152         rootElement = element;
153     }
154 
155     /** Returns the full path to the original DTD.
156      *  @return Full path to the original DTD.. */
157     public File getSystemPath() {
158         return systemPath;
159     }
160 
161     /** Tells if an element is declared in a DTD outside the one represented in
162      * <code>this</code>.
163      *
164      * @param element element to be checked for externality
165      * @return true if external, false if not (or element not part of the DTD's
166      *     elements. */
167 
168     public boolean isExternal(DTDElement element) {
169         String elementPath = definitionsMap.getLocation(element);
170         return !elementPath.equals(getSystemPath().getPath());
171     }
172 
173     /** Determine if an attributes list is defined in this DTD.
174      *  @param attList attributes list to look for.
175      *  @return true if the list is defined outside this DTD (therefore
176      *      in another one. */
177 
178     public boolean isExternal(DTDAttlist attList) {
179         return isExternal(getElementByName(attList.getName()));
180     }
181 
182     /** Gives the file name of the DTD where a given element is defined.
183      *
184      * @param element element to be checked
185      * @return the file name or <code>null</code> if the element is
186      *     not part of the DTD. */
187 
188     public String getElementOrigin(DTDElement element) {
189         String elementPath = definitionsMap.getLocation(element);
190 
191         if (elementPath == null)
192             log.warn(
193                 "Requesting info about a non existing element: "
194                     + element.name);
195         return elementPath;
196     }
197 
198     /** Build all the information needed to make a ExtendedDTD out of
199      *  a "normal" DTD. The original DTD will be read with Mark Wutka's
200      *  powerful DTD parser.
201      *
202      *  @param dtdFilePath Path to the original DTD. */
203 
204     public ExtendedDTD(File dtdFilePath, Logger log, boolean
205         getAroundNetBeanComments) throws IOException {
206 
207         this.log = log;
208         this.getAroundNetBeanComments = getAroundNetBeanComments;
209         this.systemPath = dtdFilePath;
210 
211         if (!getSystemPath().canRead())
212             throw new IOException(
213                 "Can't read " + getSystemPath()
214                     + ". Be prepared to get tons of errors !");
215 
216         DTDParser dtdParser = new DTDParser(getSystemPath());
217         dtd = dtdParser.parse(true);
218         encoding = dtdParser.getDTDEncoding();
219 
220         definitionsMap = locateElements(getSystemPath().getPath(), dtd);
221 
222         findDeclaredRootElement();
223         attributesListMap = makeAttributesListsMap(dtd.items);
224         parents = findParents(dtd);
225     }
226 
227     /** For a given DTD, this function will build a map
228      *  associating each of its elements (included those defined in
229      *  its children) to the system-name of the DTDs in which those elements
230      *  are defined. This function is necessary because Wutka's code
231      *  doesn't do that correctly.
232      *
233      *  This function is VERY INEFFICIENT !
234      *
235      *  @param filePath where the DTD file is located.
236      */
237 
238     private DefinitionsMap locateElements(String filePath, DTD dtd) throws IOException {
239 
240         // Read the DTD
241         // Very inefficient stuff here ...
242 
243         if (dtd == null) {
244             File f = new File(filePath);
245 
246             try {
247                 DTDParser dtdParser = new DTDParser(f);
248                 dtd = dtdParser.parse(true);
249             } catch (Exception ex) {
250                 log.warn("locateElements():can't read " + filePath);
251                 return null;
252             }
253         }
254 
255         // Analysing...
256 
257         DefinitionsMap defMap = new DefinitionsMap();
258 
259         // First, collect the position of elements defined in children DTDs.
260         // Children DTD are enumerated in the entities.
261 
262         Iterator items = dtd.entities.values().iterator();
263         while (items.hasNext()) {
264 
265             DTDEntity entity = (DTDEntity) items.next();
266 
267             // We're only interested in the *external DTD files*. Therefore
268             // we have the following tests.
269 
270             String nextFile = null;
271 
272             if (entity.getExternalID() != null)
273                 nextFile = entity.getExternalID().getSystem();
274 
275             // We work only with files that are on the current filesystem
276             // FIXME: break this limitation please !
277 
278             if (nextFile != null
279                 && !(nextFile.startsWith("http:")
280                     || nextFile.startsWith("file:"))) {
281 
282                 // FIXME: We're not sure if this is gonna work for files
283                 // that are placed in different directories. This depends
284                 // on the nature of externalID.system (which we don't know
285                 // for sure yet).
286 
287                 nextFile =
288                     filePath.substring(
289                         0,
290                         filePath.lastIndexOf(File.separatorChar) + 1)
291                         + nextFile;
292 
293                 // log.debug( "///// diving in "+nextFile);
294                 defMap.merge(locateElements(nextFile, null));
295             }
296         }
297 
298         // Now that we know which elements are defined in the children,
299         // we can find out those which are defined in the present DTD.
300         // The idea is simple, an element is defined in this DTD (and not
301         // in its children) if and only if it's not defined in its
302         // children. Sometimes we work on obvious things...
303 
304         items = dtd.elements.values().iterator();
305         while (items.hasNext()) {
306             DTDElement element = (DTDElement) items.next();
307             defMap.add(element, filePath);
308         }
309 
310         return defMap;
311     }
312 
313     /** Tag starting a root element defintion (@root) */
314     private static final String ROOT_TAG = DTDCommenter.ROOT_TAG;
315 
316     /** Tag starting a DTD title defintion (@title) */
317     private static final String TITLE_TAG = DTDCommenter.TITLE_TAG;
318 
319     /** Tag starting a DTD doctype defintion (@doctype) */
320     private static final String DOCTYPE_TAG = DTDCommenter.DOCTYPE_TAG;
321 
322     /** This function will locate the "@root" element in the comments
323       * in a given DTD.
324       * With that information, it will determine the root of the
325       * DTD and update the necessary member (i.e. rootElement).
326       *
327       * @return the value of the first "@root" doc-tag found.
328       */
329 
330     private String findRootTagValue() {
331 
332         String rootName = null;
333         Iterator elements = getItems().iterator();
334 
335         while (elements.hasNext()) {
336 
337             Object obj = elements.next();
338 
339             // Are we on a comment ?
340             if (obj instanceof DTDComment) {
341 
342                 DTDComment comment = (DTDComment) obj;
343 
344                 // Since all the comments from the included files also
345                 // appear in the DTD, we have to make sure we only look
346                 // at the relevant one.
347 
348                 // Then, we parse it...
349                 CommentParser cp = new CommentParser( comment, log,
350                     getAroundNetBeanComments);
351                 String rn = cp.getUniqueTagValue(ROOT_TAG);
352 
353                 // So, if we found the @root tag, it's cool but we have to
354                 // make sure that this one is related to the DTD and not one
355                 // of its children.
356 
357                 if (rn != null) {
358                     DTDElement rnElmt = getElementByName(rn);
359                     if ((rnElmt != null) && (!isExternal(rnElmt))) {
360                         rootName = rn;
361                     } else
362                         log.warn(
363                             "The root element you specified with \""
364                                 + ROOT_TAG + ' ' + rn
365                                 + "\" doesn't exist in the DTD !");
366                 }
367 
368                 // Now we figure out the title
369                 String t = cp.getUniqueTagValue(TITLE_TAG);
370                 if (t != null) {
371                     title = t;
372                 }
373 
374                 t = cp.getUniqueTagValue(DOCTYPE_TAG);
375                 if (t != null) {
376                     doctype = t;
377                 }
378             } else if ((obj instanceof DTDElement) || (obj instanceof DTDEntity)) {
379                 // root tag, title and doctype definitions are valid in first comment only
380                 // then we stop when we find the first element or entoty definition (= heuristics)
381                 break;
382             }
383         }
384 
385         if (title == null) {
386             title = Tools.getFilename(systemPath.getName());
387         }
388 
389         return rootName;
390     }
391 
392     /** This function will set the rootElement member in the dtd
393       * according to the value to be found in the "@root" doc-tag. */
394 
395     private void findDeclaredRootElement() {
396 
397         String rootName = findRootTagValue();
398 
399         // Did we find the @root doc-tag.
400         if (rootName != null) {
401             DTDElement rootNameElement = getElementByName(rootName);
402 
403             // Yes, make sure that this root really exists
404             if (rootNameElement != null) {
405                 // It does ! Now we can update the DTD.
406                 setRootElement(rootNameElement);
407             } else
408                 // It doesn't. Therefore...
409                 log.warn(
410                     "The provided root element ("
411                         + ROOT_TAG + " " + rootName
412                         + ") is nowhere to be found in the DTD!");
413 
414         }
415     }
416 
417     /** Gives a unique id for an attribute in a DTD. The id construction
418      *  relies on the fact that an attribute is always related to a element.
419      *  That is each (element, attribute) pair is unique in a DTD.
420      *
421      * @param attList The attributes list the attibute is part of.
422      * @param attribute The attribute for which we seek the id.
423      * @return The id of the attribute.
424      */
425 
426     public static String getUniqueId( DTDAttlist attList, DTDAttribute attribute) {
427         // attList.name is the element's name.
428         return attList.name + '_' + attribute.getName();
429     }
430 
431     /** Gives a unique id for an attribute in a DTD.
432      *
433      * @param element The element to which the attribute belongs.
434      * @param attribute The attribute to get the id for.
435      * @return The id of the attribute.
436      * @see #getUniqueId( DTDAttlist attList, DTDAttribute attribute)
437      */
438     public static String getUniqueId( DTDElement element, DTDAttribute attribute) {
439         return element.name + '_' + attribute.getName();
440     }
441 
442     /** Gives a unique id for an element of a DTD.
443      *
444      * @param element The element.
445      * @return The id of the element.
446      */
447 
448     public static String getUniqueId( DTDElement element) {
449         return element.getName();
450     }
451 
452     /**
453      * key = DTDAttribute , value = DTDAttList
454      * @see #makeAttributesListsMap
455      */
456     private final Map attributesListMap;
457 
458     public DTDAttlist locateAttributesList(DTDAttribute attribute) {
459         return (DTDAttlist) attributesListMap.get(attribute);
460     }
461 
462     private static Map makeAttributesListsMap(List dtdItems) {
463 
464         Map hash = new HashMap();
465 
466         for (Iterator iter = dtdItems.iterator(); iter.hasNext(); ) {
467                 Object item = iter.next();
468 
469             if (item instanceof DTDAttlist) {
470                 DTDAttlist attList = (DTDAttlist) item;
471 
472                 for (Iterator iter2 = attList.attributes.iterator(); iter2.hasNext(); )
473                     hash.put(iter2.next(), attList);
474             }
475         }
476 
477         return hash;
478     }
479 
480     /** Builds an map associating each descendant of an element to its parent
481      *  (the element itself :)). All the elements of the given DTD are
482      *  examined.
483      *
484      *  @param dtd A DTD where to look for the elements
485      *  @return A map associating the names (string) of each element
486      *      to a Set of its parents (most of the time, only one parent). */
487 
488     private static Map findParents(DTD dtd) {
489         Map parents = new HashMap();
490 
491         Iterator i = dtd.elements.values().iterator();
492 
493         // We scan all the elements of the DTD
494         while(i.hasNext()) {
495 
496             DTDElement element = (DTDElement) i.next();
497 
498             // We make sure that each element appears in the map.
499 
500             String k = element.getName();
501 
502             if( !parents.containsKey( k))
503                 parents.put( k, null);
504 
505             // Now we set each element's child parent to the element.
506 
507             // Not all elements have children (for example, an
508             // element can be EMPTY)
509             Set children = collectDeclaredChildren( element);
510 
511             if( children != null) {
512                 for ( Iterator j = children.iterator(); j.hasNext();) {
513                     Object o = j.next();
514 
515                     if (o instanceof DTDName) {
516 
517                         String cName = ((DTDName) o).getValue();
518 
519                         // Each child of e has e as parent.
520                         // Except that an element can't be his
521                         // own parent.
522 
523                         if( !element.getName().equals( cName)) {
524                             Set parentElements = (Set) parents.get(cName);
525 
526                             if (parentElements == null)
527                                 parentElements = new HashSet();
528 
529                             parentElements.add(element);
530 
531                             parents.put(cName, parentElements);
532                         }
533                         // log.debug(children[j].toString()+"("+ ((DTDName)children[j]).getValue() +")" + " --> "+ e+ "("+((DTDElement)e).getName()+")");
534                     }
535                 }
536             }
537         }
538 
539         return parents;
540     }
541 
542     /** Gets the parents of an element.
543      *
544      * @param sonName The name of the element to look for.
545      * @return Null if the element doesn't exist or it has no parent.
546      */
547 
548     public Set getParents(String sonName) {
549         return (Set) parents.get(sonName);
550     }
551 
552     /** Check if an element is root, that is, if it has no parent.
553      *
554      * @param element The element to check.
555      * @return True if its is root, false else.
556      */
557 
558     public boolean isRoot( DTDElement element) {
559         Set parents = getParents( element.getName());
560         return parents == null || parents.size() == 0;
561     }
562 
563     /** Collects the elements that appear in the definition of
564      * a given element (its "children").
565      *
566      * <p>Special definitions such as ANY, EMPTY and PCDATA are ignored.</p>
567      *
568      * @param element The element.
569      * @return The set of children or null if there ain't any child.
570      */
571 
572     public static Set collectDeclaredChildren( DTDElement element) {
573         return collectDeclaredChildrenHelper( element.getContent());
574     }
575 
576     private static Set collectDeclaredChildrenHelper( DTDItem item) {
577 
578         if (item instanceof DTDContainer) {
579 
580             Iterator items = ((DTDContainer) item).getItemsVec().iterator();
581 
582             Set children = null;
583 
584             while (items.hasNext()) {
585                 Set s = collectDeclaredChildrenHelper( (DTDItem)items.next());
586                 if (s != null) {
587                     if( children == null) children = new HashSet();
588                     children.addAll(s);
589                 }
590             }
591 
592             return children;
593 
594         } else if (item instanceof DTDName)
595 
596             return Collections.singleton( item);
597 
598         else
599             // ANY, PCDATA and EMPTY are not considered as children, they
600             // are not named.
601             return null;
602     }
603 
604 }