#Word Sketch Engine #ver.1.0 # # Changelog # - Initial version. [13 April 2015, Jelena Kallas, Maria Khokhlova] # - First modifications. [20 April 2015, Tarja Heinonen] # - April 2015 version with some clarifications. [15 May 2015, Tarja Heinonen] *FIXORDER modifies Adj_modifier participle_modifier subject subject_of object object_of Gen_modifier Gen_modifies Part_modifier Part_modifies Adv_modifier V_modifies Adv_modifies Adj_modifies *STRUCTLIMIT s *DEFAULTATTR tag *CONSTRUCTION *UNARY =Nominative 1:[tag="N_Nom.*"] 1:[tag="A_Nom.*"] *CONSTRUCTION *UNARY =Genitive 1:[tag="N_Gen.*"] 1:[tag="A_Gen.*"] *CONSTRUCTION *UNARY =Partitive 1:[tag="N_Par.*"] 1:[tag="A_Par.*"] *CONSTRUCTION *UNARY =Inessive 1:[tag="N_Ine.*"] 1:[tag="A_Ine.*"] *CONSTRUCTION *UNARY =Elative 1:[tag="N_Ela.*"] 1:[tag="A_Ela.*"] *CONSTRUCTION *UNARY =Illative 1:[tag="N_Ill.*"] 1:[tag="A_Ill.*"] *CONSTRUCTION *UNARY =Adessive 1:[tag="N_Ade.*"] 1:[tag="A_Ade.*"] *CONSTRUCTION *UNARY =Ablative 1:[tag="N_Abl.*"] 1:[tag="A_Abl.*"] *CONSTRUCTION *UNARY =Allative 1:[tag="N_All.*"] 1:[tag="A_All.*"] *CONSTRUCTION *UNARY =Essive 1:[tag="N_Ess.*"] 1:[tag="A_Ess.*"] *CONSTRUCTION *UNARY =Translative 1:[tag="N_Tra.*"] 1:[tag="A_Tra.*"] *CONSTRUCTION *UNARY =Abessive 1:[tag="N_.Abe"] 1:[tag="A_.Abe"] *CONSTRUCTION *UNARY =Comitative 1:[tag="N_.Com"] 1:[tag="A_.Com"] *CONSTRUCTION *UNARY =Instructive 1:[tag="N_Ins.*"] 1:[tag="A_Ins.*"] *SYMMETRIC =ja/tai/vai 2:[tag="N_Nom.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Nom.*"] 2:[tag="N_Gen.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Gen.*"] 2:[tag="N_Par.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Par.*"] 2:[tag="N_Tra.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Tra.*"] 2:[tag="N_Ess.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Ess.*"] 2:[tag="N_Ill.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Ill.*"] 2:[tag="N_Ine.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Ine.*"] 2:[tag="N_Ela.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Ela.*"] 2:[tag="N_All.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_All.*"] 2:[tag="N_Ade.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Ade.*"] 2:[tag="N_Abl.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Abl.*"] 2:[tag="N_Ins.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Ins.*"] 2:[tag="N_Com"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Com"] 2:[tag="N_Abe.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Abe.*"] 2:[tag="A_Nom.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Nom.*"] 2:[tag="A_Gen.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Gen.*"] 2:[tag="A_Par.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Par.*"] 2:[tag="A_Tra.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Tra.*"] 2:[tag="A_Ess.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Ess.*"] 2:[tag="A_Ill.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Ill.*"] 2:[tag="A_Ine.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Ine.*"] 2:[tag="A_Ela.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Ela.*"] 2:[tag="A_All.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_All.*"] 2:[tag="A_Ade.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Ade.*"] 2:[tag="A_Abl.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Abl.*"] 2:[tag="A_Ins.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Ins.*"] 2:[tag="A_Com"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Com"] 2:[tag="A_Abe.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="A_Abe.*"] 2:[tag="Adv"] [word="ja"|word="tai"|word="vai"] 1:[tag="Adv"] 2:[tag="V.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="V.*"] # ja, tai, vai; eli, tahi, taikka could be still added *SYMMETRIC =kuin [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3} 2:[tag="N_Nom.*"] [word="kuin"] 1:[tag="N_Nom.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"] {0,3}2:[tag="N_Gen.*"] [word="ja"|word="tai"|word="vai"] 1:[tag="N_Gen.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"] {0,3}2:[tag="N_Par.*"] [word="kuin"] 1:[tag="N_Par.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Tra.*"] [word="kuin"] 1:[tag="N_Tra.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Ess.*"] [word="kuin"] 1:[tag="N_Ess.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Ill.*"] [word="kuin"] 1:[tag="N_Ill.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Ine.*"] [word="kuin"] 1:[tag="N_Ine.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Ela.*"] [word="kuin"] 1:[tag="N_Ela.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_All.*"] [word="kuin"] 1:[tag="N_All.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Ade.*"] [word="kuin"] 1:[tag="N_Ade.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Abl.*"] [word="kuin"] 1:[tag="N_Abl.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Ins.*"] [word="kuin"] 1:[tag="N_Ins.*"] [lemma!="niin"&lemma!="enemmän"&lemma!="ennemminkin"&lemma!="enemmänkin"&lemma!="parempi"&tag!="Pron.*"]{0,3}2:[tag="N_Com"] [word="kuin"] 1:[tag="N_Com"] [lemma!="yhtä"]{0,3}2:[tag="A_Nom.*"] [word="kuin"] 1:[tag="N_Nom.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Gen.*"] [word="kuin"] 1:[tag="N_Gen.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Par.*"] [word="kuin"] 1:[tag="N_Par.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Tra.*"] [word="kuin"] 1:[tag="N_Tra.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Ess.*"] [word="kuin"] 1:[tag="N_Ess.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Ill.*"] [word="kuin"] 1:[tag="N_Ill.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Ine.*"] [word="kuin"] 1:[tag="N_Ine.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Ela.*"] [word="kuin"] 1:[tag="N_Ela.*"] [lemma!="yhtä"]{0,3}2:[tag="A_All.*"] [word="kuin"] 1:[tag="N_All.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Ade.*"] [word="kuin"] 1:[tag="N_Ade.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Abl.*"] [word="kuin"] 1:[tag="N_Abl.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Ins.*"] [word="kuin"] 1:[tag="N_Ins.*"] [lemma!="yhtä"]{0,3}2:[tag="A_Com"] [word="kuin"] 1:[tag="N_Com"] [lemma!="yhtä"]{0,3}2:[tag="A_Com"] [word="kuin"] 1:[tag="N_Com"] *DUAL =Adj_modifier/modifies 2:[tag="A_Nom_Sg"] [tag="A_.*"]? 1:[tag="N_Nom_Sg"] 2:[tag="A_Nom_Pl"] [tag="A_.*"]? 1:[tag="N_Nom_Pl"] 2:[tag="A_Gen.*"] [tag="A_.*"]? 1:[tag="N_Gen.*"] 2:[tag="A_Par.*"] [tag="A_.*"]? 1:[tag="N_Par.*"] 2:[tag="A_Tra.*"] [tag="A_.*"]? 1:[tag="N_Tra.*"] 2:[tag="A_Ess.*"] [tag="A_.*"]? 1:[tag="N_Ess.*"] 2:[tag="A_Ill.*"] [tag="A_.*"]? 1:[tag="N_Ill.*"] 2:[tag="A_Ine.*"] [tag="A_.*"]? 1:[tag="N_Ine.*"] 2:[tag="A_Ela.*"] [tag="A_.*"]? 1:[tag="N_Ela.*"] 2:[tag="A_All.*"] [tag="A_.*"]? 1:[tag="N_All.*"] 2:[tag="A_Ade.*"] [tag="A_.*"]? 1:[tag="N_Ade.*"] 2:[tag="A_Abl.*"] [tag="A_.*"]? 1:[tag="N_Abl.*"] 2:[tag="A_Ins.*"] [tag="A_.*"]? 1:[tag="N_Ins.*"] 2:[tag="A_Com"] [tag="A_.*"]? 1:[tag="N_Com"] 2:[tag="A_Abe.*"] [tag="A_.*"]? 1:[tag="N_Abe.*"] =participle_modifier *COLLOC "%(2.lc)_%(1.lemma)-p" # [lemma!="olla"&lemma!="ole"] 2:[tag=".*Prc.*Nom.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Nom.*"] 2:[tag=".*Prc.*Nom.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Nom.*"] 2:[tag=".*Prc.*Gen.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Gen.*"] 2:[tag=".*Prc.*Par.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Par.*"] 2:[tag=".*Prc.*Tra.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Tra.*"] 2:[tag=".*Prc.*Ess.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ess.*"] # [lemma!="olla"&lemma!="ole"] 2:[tag=".*Prc.*Ine.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ine.*"] 2:[tag=".*Prc.*Ine_Sg"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ine_Sg"] 2:[tag=".*Prc.*Ine_Pl"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ine_Pl"] 2:[tag=".*Prc.*Ela.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ela.*"] 2:[tag=".*Prc.*Ill.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ill.*"] 2:[tag=".*Prc.*Ade.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ade.*"] 2:[tag=".*Prc.*Abl.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Abl.*"] 2:[tag=".*Prc.*All.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_All.*"] 2:[tag=".*Prc.*Ins.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Ins.*"] 2:[tag=".*Prc.*Com.*"&tag!="V.*"] [tag="N_Gen.*"]? 1:[tag="N_Com.*"] =Gen_modifier *COLLOC "%(2.lc)_%(1.lemma)-p" 2:[tag="N_Gen.*"][tag="A_.*"]? 1:[tag="N_.*"] =Gen_modifies *COLLOC "%(1.lc)_%(2.lemma)-p" 1:[tag="N_Gen.*"][tag="A_.*"]? 2:[tag="N_.*"] =Part_modifier *COLLOC "%(1.lemma)_%(2.lc)-p" 1:[tag="N_.*"&(lemma="litra"|lemma="hehtaari"|lemma="tonni"|lemma="aste"|lemma="pari"|lemma="neljäsosa"|lemma="kolmasosa"|lemma="osa"|lemma="läjä"|lemma="kasa"|lemma="pino"|lemma="rivi"|lemma="annos"|lemma="lasi"|lemma="pullo"|lemma="kuppi"|lemma="ämpäri"|lemma="purkki"|lemma="tölkki"|lemma="arkki"|lemma="säkki"|lemma="valikoima"|lemma="rasia"|lemma="kimppu"|lemma="pussi"|lemma="paketti"|lemma="desilitra"|lemma="desi"|lemma="gramma"|lemma="kilo"|lemma="nokare"|lemma="tilkka"|lemma="tilkkanen")] [tag="A_.*"]{0,2} 2:[tag="N_Par.*"] 1:[lemma="kupillinen"|lemma="lasillinen"|lemma="pullollinen"|lemma="lusikallinen"|lemma="teelusikallinen"|lemma="ruokalusikallinen"|lemma="kassillinen"|lemma="kattilallinen"|lemma="kannullinen"|lemma="kauhallinen"|lemma="hyppysellinen"|lemma="ämpärillinen"|lemma="tölkillinen"] [tag="A_.*"]{0,2} 2:[tag="N_Par.*"] 1:[tag="Adv"&(lemma="paljon"|lemma="vähän"|lemma="liikaa"|word="lisää")] [tag="A_.*"]? 2:[tag="N_Par.*"] 1:[word="runsaasti"|word="rutkasti"|word="reilusti"|word="reippaasti"|word="melkoisesti"|word="tuhottomasti"|word="valtavasti"|word="laajasti"|word="kattavasti"|word="kovasti"|word="helvetisti"|word="hemmetisti"|word="vitusti"] [tag="A_.*"]? 2:[tag="N_Par.*"] =Part_modifies *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="N_.*"&(lemma="litra"|lemma="hehtaari"|lemma="tonni"|lemma="aste"|lemma="pari"|lemma="neljäsosa"|lemma="kolmasosa"|lemma="osa"|lemma="läjä"|lemma="kasa"|lemma="pino"|lemma="rivi"|lemma="annos"|lemma="lasi"|lemma="pullo"|lemma="kuppi"|lemma="ämpäri"|lemma="purkki"|lemma="tölkki"|lemma="arkki"|lemma="säkki"|lemma="valikoima"|lemma="rasia"|lemma="kimppu"|lemma="pussi"|lemma="paketti"|lemma="desilitra"|lemma="desi"|lemma="gramma"|lemma="kilo"|lemma="nokare"|lemma="tilkka"|lemma="tilkkanen")][tag="A_.*"]{0,2} 1:[tag="N_Par.*"] 2:[lemma="kupillinen"|lemma="lasillinen"|lemma="pullollinen"|lemma="lusikallinen"|lemma="teelusikallinen"|lemma="ruokalusikallinen"|lemma="kassillinen"|lemma="kattilallinen"|lemma="kannullinen"|lemma="kauhallinen"|lemma="hyppysellinen"|lemma="ämpärillinen"|lemma="tölkillinen"] [tag="A_.*"]{0,2} 1:[tag="N_Par.*"] 2:[tag="Adv"&(lemma="paljon"|lemma="vähän"|lemma="liikaa"|word="lisää")] [tag="A_.*"]? 1:[tag="N_Par.*"] 2:[word="runsaasti"|word="rutkasti"|word="reilusti"|word="reippaasti"|word="melkoisesti"|word="tuhottomasti"|word="valtavasti"|word="laajasti"|word="kattavasti"|word="kovasti"|word="helvetisti"|word="hemmetisti"|word="vitusti"] [tag="A_.*"]? 1:[tag="N_Par.*"] *DUAL =Pron_modifier/modifies 2:[tag="Pron_.*_Nom_Sg"] [tag="A_.*"]? 1:[tag="N_Nom_Sg"] 2:[tag="Pron_.*_Nom_Pl"] [tag="A_.*"]? 1:[tag="N_Nom_Pl"] 2:[tag="Pron_.*_Gen.*"] [tag="A_.*"]? 1:[tag="N_Gen.*"] 2:[tag="Pron_.*_Par.*"] [tag="A_.*"]? 1:[tag="N_Par.*"] 2:[tag="Pron_.*_Tra.*"] [tag="A_.*"]? 1:[tag="N_Tra.*"] 2:[tag="Pron_.*_Ess.*"] [tag="A_.*"]? 1:[tag="N_Ess.*"] 2:[tag="Pron_.*_Ill.*"] [tag="A_.*"]? 1:[tag="N_Ill.*"] 2:[tag="Pron_.*_Ine.*"] [tag="A_.*"]? 1:[tag="N_Ine.*"] 2:[tag="Pron_.*_Ela.*"] [tag="A_.*"]? 1:[tag="N_Ela.*"] 2:[tag="Pron_.*_All.*"] [tag="A_.*"]? 1:[tag="N_All.*"] 2:[tag="Pron_.*_Ade.*"] [tag="A_.*"]? 1:[tag="N_Ade.*"] 2:[tag="Pron_.*_Abl.*"] [tag="A_.*"]? 1:[tag="N_Abl.*"] 2:[tag="Pron_.*_Ins.*"] [tag="A_.*"]? 1:[tag="N_Ins.*"] 2:[tag="Pron_.*_Com"] [tag="A_.*"]? 1:[tag="N_Com"] # do not produce necessarily interesting results 2:[tag="Pron_.*_Gen.*"] etc. # perhaps you could try other way round, too: what pronouns combine with # so that you switch the numbers 2-> 1, and 1-> 2. *DUAL =Num_modifier/modifies 2:[tag="Num_Nom_Sg"] 1:[tag="N_Nom_Par"] 2:[tag="Num_Ord_Nom_Sg"] [tag="A_.*"]? 1:[tag="N_Nom_Sg"] 2:[tag="Num_.*Nom_Pl"] [tag="A_.*"]? 1:[tag="N_Nom_Pl"] 2:[tag="Num_.*Gen.*"] [tag="A_.*"]? 1:[tag="N_Gen.*"] 2:[tag="Num_.*Par.*"] [tag="A_.*"]? 1:[tag="N_Par.*"] 2:[tag="Num_.*Tra.*"] [tag="A_.*"]? 1:[tag="N_Tra.*"] 2:[tag="Num_.*Ess.*"] [tag="A_.*"]? 1:[tag="N_Ess.*"] 2:[tag="Num_.*Ill.*"] [tag="A_.*"]? 1:[tag="N_Ill.*"] 2:[tag="Num_.*Ine.*"] [tag="A_.*"]? 1:[tag="N_Ine.*"] 2:[tag="Num_.*Ela.*"] [tag="A_.*"]? 1:[tag="N_Ela.*"] 2:[tag="Num_.*All.*"] [tag="A_.*"]? 1:[tag="N_All.*"] 2:[tag="Num_.*Ade.*"] [tag="A_.*"]? 1:[tag="N_Ade.*"] 2:[tag="Num_.*Abl.*"] [tag="A_.*"]? 1:[tag="N_Abl.*"] 2:[tag="Num_.*Ins.*"] [tag="A_.*"]? 1:[tag="N_Ins.*"] 2:[tag="Num_Com"] [tag="A_.*"]? 1:[tag="N_Com"] =Proper noun_modifier *COLLOC "%(2.lc)_%(1.lemma)-p" 2:[tag="N_Prop_.*"] 1:[tag="N_.*"] =Proper noun_modifies *COLLOC "%(1.lc)_%(2.lemma)-p" 1:[tag="N_Prop_.*"] 2: [tag="N_.*"] =N_Inf1 *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="Adv"]? 1:[tag="N.*"] 2:[tag="V_Inf1_Lat"] [lemma="saada"] [tag="Adv"]? 1:[tag="N.*"&(lemma="lupa"|lemma="kunnia")] 2:[tag="V_Inf1_Lat"] # on lupa tehdä jotakin; saada kunnian tehdä jotakin =N_Inf3 *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="Adv"]? 1:[lemma="mestari"|lemma="haka"] 2:[tag="V_Inf3_Ill"] # on mestari tekemään jotakin =A_Inf1 *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="Adv"]? 1:[tag="A_.*"] 2:[tag="V_Inf1_Lat"] # on hauska/vaikeaa lukea etc. =A_Inf3 *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="Adv"]? 1:[tag="A_.*"|word="tervetullut"|word="tervetulleita"|word="jäävejä"] 2:[tag="V_Inf3_Ill"] # on nopea lukemaan + ovat jäävejä tekemään jotakin # jäävejä, tervetulleita misanalyzed, tervetullut PrPrc or N =N_Relcl *COLLOC "%(2.lc)-p" 1:[tag="N.*"] [tag="Punct"]? 2:[tag="Pron_Rel.*"] # poika, joka (comma may be missing) =N_Complcl *COLLOC "%(2.lc)-p" [lemma="olla"] 1:[tag="N.*"] [tag="Punct"]? 2:[lemma="että"] # on ihme, että (comma may be missing) =N_Questcl *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="A_.*"]? 1:[tag="N_Nom.*"] [tag="Punct"]? 2:[tag="V.*"&(word=".*ko"|word=".*kö")&word!="eikö"] [lemma="olla"] [tag="A_.*"]? 1:[tag="N_Par.*"] [tag="Punct"]? 2:[tag="V.*"&(word=".*ko"|word=".*kö")&word!="eikö"] # kysymys, onko (comma may be missing); eikö-tag question excluded; still mishits #[lemma="olla"] [tag="A_.*"]? 1:[tag="N_Nom.*"] [tag="Punct"]? 2:[lemma="kuka"] #1:[tag="N.*"&(lemma="kysymys"|lemma="epäilys"|lemma="epäselvyys"|lemma="selvyys"|lemma="varmuus"|lemma="tieto"|lemma="ongelma"|lemma="tutkimus"|lemma="selvitys"|lemma="pohdinta")] [tag="Punct"]? 2:[lemma="mikä"|lemma="kuka"] # kysymys, mikä (comma may be missing); overlaps with N_Relcl with mikä-clauses # this is not ok yet =N_Elative *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="A_.*"]? 1:[tag="N_Nom.*"&lemma!="aika"&lemma!="pakko"] 2:[tag="N_Ela.*"&lemma!="mieli"&lemma!="alku"] [lemma="olla"] [lemma="taas"]?[lemma="se"]? 1:[tag="N_Nom.*"&lemma="aika"] 2:[tag="N_Ela.*"&lemma="vuosi"] # on kertomus miehestä; expressions like "aika perseestä" excluded since "aika" often misanalyzed as a noun =N_Illative *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="A_.*"]? 1:[tag="N_Nom.*"] 2:[tag="N_Ill.*"&word!="elämääni"&word!="matkaani"] [tag!="V_Inf2.*"&tag!="P.*Prc.*"] # on oikeus elämään; the last part excludes the hits that belong to a following non-finate clause =N_Allative *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="A_.*"]? 1:[tag="N_Nom.*"] 2:[tag="N_All.*"] [tag!="V_Inf2.*"&tag!="P.*Prc.*"] # uhka tulevaisuudelle; the last part excludes the hits that belong to a following non-finate clause =N_PostP *COLLOC "%(2.lc)-p" 2:[tag="N_Gen.*"] 1:[tag="Adv|Adp_Po"] =PreP_N *COLLOC "%(2.lc)-p" 2:[tag="Adp_Pr"&lemma!="sitten"] 1:[tag="N_Par.*"] 2:[tag="Adv"&lemma="ennen"] 1:[tag="N_Par.*"] *DUAL =subject/subject_of 1:[tag="N_Nom.*"] [tag="Adv"]? 2:[tag="V.*Act.*3"&lemma!="olla"&lemma!="ei"&lemma!="pitää"&lemma!="voida"&lemma!="saada"&lemma!="saattaa"] #basic word order; uninteresting verbs excluded 2:[tag="V.*Act.*3"&word=".*ko"&lemma!="olla"&lemma!="ei"&lemma!="pitää"&lemma!="voida"&lemma!="saada"&lemma!="saattaa"] [tag="A_.*"]? 1:[tag="N_Nom.*"] #question with -ko 2:[tag="V.*Act.*3sg&(lemma="tulla"|lemma="kasvaa"|lemma="näkyä"|lemma="kuulua"|lemma="liikkua"|lemma="syntyä")] [tag="A_.*"]? 1:[tag="N_Par.*"&word!="syötyä"&word!="....*ttua"&word!="....*ttyä"] #tulee tavoitteita etc.; not: tulee syötyä, valittua etc. # syötyä misanalyzed morphologically, excluded *DUAL =object/object_of 2:[tag="V.*"&lemma!="olla"&lemma!="ei"] [tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Par.*"] # verb, not "olla" or "ei" with a NP in the partitive case 2:[tag="V_Imprt.*2"&lemma!="olla"&lemma!="ei"] [tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Nom.*"] # verb in the imperative with a NP in the nominative case 2:[tag="V.*Pass.*"&lemma!="olla"&lemma!="ei"] [tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Nom.*"&lemma!="läpi"] # verb in the passive voice with a NP in the nominative case; läpi almost always not a noun 2:[tag="V.*"&lemma!="olla"&lemma!="ei"] [tag="Adv"&lemma!="alle"&lemma!="alta"&lemma!="yli"&lemma!="läpi"]? [tag="A_.*"]? 1:[tag="N_Gen.*"] [tag="SENT"] # verb, not "olla" or "ei" with a NP in the genitive case at the end of sentence # not a real rule, just something to pick verbs with genitive objects 2:[tag="V.*"&lemma!="olla"&lemma!="ei"] [tag="Adv"&lemma!="alle"&lemma!="alta"&lemma!="yli"&lemma!="läpi"]? [tag="A_.*"]? 1:[tag="N_Gen.*"] [tag="Punct"] [tag!="N_Gen.*"] # verb with a NP in the genitive case before a punctuation mark and so that another genitive NP is not following 2:[tag="V.*"&lemma!="olla"&lemma!="ei"] [tag="Adv"&lemma!="alle"&lemma!="alta"&lemma!="yli"&lemma!="läpi"]? [tag="A_.*"]? 1:[tag="N_Gen.*"] [tag="Pron_Rel.*"|tag="CS"&lemma!="vaan"|word="ettei"|word="etten"|word="vaikkei"|word="ellei"|word="jossei"] # verb with a NP in the genitive case and a subordinate clause without a punctuation mark (a common mistake) # SUMMARY: #v+(adj)+partitive case; #V(imperative/passive)+(adj)+nominative case; #v+(adj)+genitive case: forced sentence sentence/clause ending and tried to exclude the structure with co-ordinated genitive modifiers # notice that conjuction+negative verb blends like "ettei" are all misanalyzed morphologically, the best analysis is as V_Prs_Act_Sg3 1469 times out of 31362, # also colloquial forms like "vaik", "mut" are missing from conjunctions (mut is difficult because it is also colloquial form for "minut") *DUAL =Adv_modifier/V_modifies 1:[tag="V.*"&lemma!="olla"&lemma!="ei"] 2:[tag="Adv"&lemma!="myös"&lemma!="jo"&lemma!="kuitenkin"&lemma!="vielä"&lemma!="ihan"&lemma!="aina"&lemma!="vain"&lemma!="kyllä"&lemma!="siis"&lemma!="joskus"&lemma!="taas"&lemma!="nyt"&lemma!="heti"&lemma!="todella"] *DUAL =Adv_modifier/Adv_modifies 2:[tag="Adv"&lemma!="myös"&lemma!="jo"&lemma!="kuitenkin"&lemma!="vielä"&lemma!="ihan"&lemma!="aina"&lemma!="vain"&lemma!="kyllä"&lemma!="siis"&lemma!="joskus"&lemma!="taas"&lemma!="nyt"&lemma!="heti"&lemma!="todella"] 1:[tag="Adv"] *DUAL =Adv_modifier/Adj_modifies 2:[tag="Adv"&lemma!="myös"&lemma!="jo"&lemma!="kuitenkin"&lemma!="vielä"&lemma!="ihan"&lemma!="aina"&lemma!="vain"&lemma!="kyllä"&lemma!="siis"&lemma!="joskus"&lemma!="taas"&lemma!="nyt"&lemma!="heti"&lemma!="todella"] [tag=".*Prc.*Nom.*"&tag!="V.*"] 2:[tag="Adv"&lemma!=".*sti"&lemma!="myös"&lemma!="jo"&lemma!="kuitenkin"&lemma!="vielä"&lemma!="ihan"&lemma!="aina"&lemma!="vain"&lemma!="kyllä"&lemma!="siis"&lemma!="joskus"&lemma!="taas"&lemma!="nyt"&lemma!="heti"&lemma!="todella"] 1:[tag="A_.*"] *DUAL =predicate_A/predicate_A_of 1:[tag="N_Nom.*"][tag="V.*"&(lemma="voida"|lemma="pitää"|lemma="täytyä"|lemma="osata"|lemma="saada"|lemma="haluta"|lemma="tahtoa"|lemma="aikoa")]? [lemma="olla"][tag="Adv"]? 2:[tag="A_Nom.*"] [tag!="N.*"] 1:[tag="N_Nom.*"] [tag="V.*"&(lemma="voida"|lemma="pitää"|lemma="täytyä"|lemma="osata"|lemma="saada"|lemma="haluta"|lemma="tahtoa"|lemma="aikoa")]? [lemma="olla"] [tag="Adv"]? 2:[tag="A_Par.*"][tag!="N.*"] # adjectives as predicate complements [lemma="olla"&(word=".*ko"|word=".*kö")][tag="N_Nom.*"][tag="Adv"]? 2:[tag="A_Nom.*"] [tag!="N.*"] [lemma="olla"&(word=".*ko"|word=".*kö")][tag="N_Nom.*"][tag="Adv"]? 2:[tag="A_Par.*"][tag!="N.*"] =V_Inf1 *COLLOC "%(2.lc)-p" 1:[tag="V.*"] 2:[tag="V_Inf1_Lat"] =V_Inf3_Ine *COLLOC "%(2.lc)-p" 1:[tag="V.*"] 2:[tag="V_Inf3_Ine"] =V_Inf3_Ill *COLLOC "%(2.lc)-p" 1:[tag="V.*"] 2:[tag="V_Inf3_Ill"] =V_Inf3_Abe *COLLOC "%(2.lc)-p" 1:[tag="V.*"] 2:[tag="V_Inf3_Abe"] =adverbial_Inessive *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!= "olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Ine.*"] =adverbial_Inessive_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Ine.*"] =adverbial_Elative *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Ela.*"] =adverbial_Elative_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Ela.*"] =adverbial_Illative *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!= "olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Ill.*"] =adverbial_Illative_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!= "olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Ill.*"] =adverbial_ Adessive *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Ade.*"] =adverbial_Adessive _of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Ade.*"] =adverbial_Ablative *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Abl.*"] =adverbial_Ablative_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Abl.*"] =adverbial_Allative *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_All.*"] =adverbial_Allative_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_All.*"] =adverbial_Essive *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Ess.*"] =adverbial_Essive_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Ess.*"] =adverbial_Translative *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Tra.*"] =adverbial_Translative_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Tra.*"] =adverbial_Abessive *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Abe.*"] =adverbial_Abessive_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Abe.*"] =adverbial_Comitative *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Com.*"] =adverbial_Comitative_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Com.*"] =adverbial_Instructive *COLLOC "%(2.lc)-p" 1:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 2:[tag="N_Ins.*"] =adverbial_Instructive_of *COLLOC "%(2.lemma)_%(1.lc)-p" 2:[tag="V_.*"&lemma!="olla"][tag="Adv"]? [tag="A_.*"]? 1:[tag="N_Ins.*"] # mini-expressions =olla_Inessive *COLLOC "%(2.lc)-p" [lemma="olla"] [tag="Adv"]? 2:[word="menossa"|word="tulossa"|word="lähdössä"|word="matkalla"] 1:[tag="N_.*"] # at least of two types: on tulossa kotiin; on tulossa muutos; the formalism is not checked yet