(:~
: *****************************************************************
:
: This XQuery script converts the GermaNet-specific XML version of
: GermaNet to Wordnet-LMF (unmodified version). It resulted from
: our work on
:
: "Standardizing Wordnets in the ISO Standard LMF:
: Wordnet-LMF for GermaNet"
:
: The purpose of this work is to show how GermaNet, the German
: version of the Princeton WordNet, can be converted to the Lexical
: Markup Framework (LMF), a published ISO standard (ISO-24613) for
: encoding lexical resources. The conversion builds on Wordnet-LMF,
: which has been proposed in the context of the EU KYOTO project as
: an LMF format for wordnets. In our paper (see reference below),
: we propose a number of crucial modifications and a set of
: extensions to Wordnet-LMF that are needed for conversion of
: wordnets in general and for conversion of GermaNet in particular.
:
: The present XQuery script converts GermaNet to the unmodified
: version of Wordnet-LMF. Note that there is another XQuery script
: available that converts GermaNet to the modified version of
: Wordnet-LMF.
:
: *****************************************************************
:
: Reference:
: Verena Henrich and Erhard Hinrichs: Standardizing Wordnets in the
: ISO Standard LMF: Wordnet-LMF for GermaNet. In Proceedings of the
: 23rd International Conference on Computational Linguistics
: (COLING 2010), Beijing, China, August 2010.
:
: Download paper: http://www.aclweb.org/anthology/C10-1052
: Citation: http://www.aclweb.org/anthology-new/C/C10/C10-1052.bib
:
: *****************************************************************
:
: @author Verena Henrich
: @version 1.0
: @see http://www.aclweb.org/anthology/C10-1052
:)
declare namespace germanet = "http://www.sfs.uni-tuebingen.de/GermaNet"; (: Namespace declaration. :)
declare option saxon:output "doctype-system=kyoto_wn_revised.dtd";
declare option saxon:output "indent=yes";
declare variable $germanetVersion := "5.3"; (: The current version of GermaNet. :)
declare variable $germanet := collection("/home/vhenrich/GermaNet/GN_V53/"); (: The path to the GermaNet data. :)
(:~
: This function returns the first character of $string.
:
: @param $string the URI for the library module
: @return the first character
:)
declare function germanet:getFirstCharacter($string as xs:string) as xs:string {
substring($string, 1, 1)
};
(:~
: This function concatenates strings and characters to create an ID
: for a Synset in Wordnet-LMF.
:
: @param $synset the synset from GermaNet XML whose ID should
: be converted to a Synset ID in Wordnet-LMF
: @return the newly created Synset ID
:)
declare function germanet:getSynsetId($synset as element()) as xs:string {
concat("deu-",fn:replace($germanetVersion,"\.",""),"-",$synset/@id,"-",germanet:getFirstCharacter($synset/@category))
};
(:~
: This function concatenates strings and characters to create an ID
: for a LexicalEntry in Wordnet-LMF.
:
: @param $lexEntryNum the number corresponding to a LexicalEntry
: in Wordnet-LMF which is needed to create the ID string
: @param $category the category is needed to create the ID string
: @return the newly created LexicalEntry ID
:)
declare function germanet:getLexicalEntryId($lexEntryNum as xs:integer, $category as xs:string) as xs:string {
concat("deu-",fn:replace($germanetVersion,"\.",""),"-LE",$lexEntryNum,"-",germanet:getFirstCharacter($category))
};
(:~
: This function returns the name of a relation in Wordnet-LMF that
: corresponds to the relation name $rel from GermaNet XML.
:
: @param $rel the name of the relation in GermaNet XML
: @return the new name of the relation in Wordnet-LMF
:)
declare function germanet:getNewRelName($rel as xs:string) as xs:string {
if ($rel = "hyperonymy") (: conceptual relation :)
then "has_hyperonym"
else if ($rel = "meronymy") (: conceptual relation :)
then "has_meronym"
else if ($rel = "association") (: conceptual relation :)
then "related_to"
else if ($rel = "holonymy") (: conceptual relation :)
then "has_holonym"
else if ($rel = "entailment") (: conceptual relation :)
then "is_subevent_of"
else if ($rel = "causation") (: conceptual relation :)
then "causes"
else if ($rel = "antonymy") (: lexical relation :)
then "antonym"
else if ($rel = "pertonymy") (: lexical relation :)
then "pertains_to"
else if ($rel = "participle") (: lexical relation :)
then "is_derived_from"
else "NA"
};
(:~
: This function converts example elements from GermaNet XML to
: the corresponding Statement elements in Wordnet-LMF with
: their example attributes.
:
: @param $examples example elements from GermaNet XML
: @return SenseExample elements in Wordnet-LMF
:)
declare function germanet:getExamplesForSynset($examples as element()*) as element()* {
for $example in $examples
return
};
(::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
: Start of the query body.
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::)
{
(::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
: The following loop constructs all LexicalEntrie and Sense
: elements in Wordnet-LMF. Therefore, it iterates through all word
: categories and then through all lemmas that were extracted from
: the GermaNet XML.
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::)
(: Iterate through all word categories. :)
for $category in ("adj", "nomen", "verben")
return
let $lemmas := for $lemma in distinct-values($germanet/synsets/synset[@category = $category]/lexUnit/orthForm)
order by $lemma
return $lemma
(: Iterate through all lemmas of a word category. :)
for $lemma at $lexEntryNum in $lemmas
return
{
(: Iterate through all synsets that contain a lexical unit with the specified lemma of a sense. :)
for $synset at $sense in $germanet/synsets/synset[@category = $category and lexUnit/orthForm = $lemma]
return
{
(: Encode a reference to the GermaNet database table with the
: corresponding database ID in a MonolingualExternalRef element.
:)
}
}
}
{
(::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
: The following loop constructs all Synset elements in Wordnet-LMF.
: Therefore, it iterates through all synset elements in GermaNet
: XML.
::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::)
(: Iterate through all synsets. :)
for $synset in $germanet/synsets/synset
let $examples := $synset//example
let $conRels := $germanet/relations/con_rel[@from = $synset/@id]
return
{
(: Write a Definition in case a sense has a describing paraphrase, and
: write examples in case a sense has examples.
:)
if ($synset/paraphrase and $examples)
then
{germanet:getExamplesForSynset($examples)}
else if ($synset/paraphrase)
then
else if ($examples)
then
{germanet:getExamplesForSynset($examples)}
else ""
}
{
(: Write SynsetRelations (conceptual relations) of a synset.
: There is at least one such relation for each synset, i.e., the
: relation to its hyperonym, except for the root synset GNROOT.
:)
if ($conRels)
then
{
(: Iterate through all conceptual relations of a synset. :)
for $conRel in $conRels
return
}
else ""
}
{
(: Encode a reference to the GermaNet database table with the
: corresponding database ID in a MonolingualExternalRef element.
:)
}
}