use util; #Config file for acronym extraction. EDIT THIS FILE, it should #not be overridden by the software. It's read by GSDL using perl's #'eval' function, so pretty much anything that's valid in perl is #valid here. #Quite a few things here are defined in terms of recall and precision #which are the key measures from Information Retreval (IR). If you #don't understand recall and precision, any good IR textbook should #explain them fully #the maximum range to look for acronyms (raise to raise precision) $local_max_offset = 30; #acronyms must be upper case (0 = false, 1 = true (high precision)) $local_upper_case = 1; #acronym case must match (0 = false, 1 = true (high precision)) $local_case_match = 1; #minimum acronym length (raise to raise precision) $local_min_def_length = 3; #let definitions be all capitals $local_allow_all_caps = 0; #minimum acronym length (raise to raise precision) $local_min_acro_length = 3; #minimum acronym length saving (raise to raise precision) $local_min_length_saving = 4; #allow recusive acronyms (0 = false (high precision), 1 = true) $local_allow_recursive = 0; #stop words-words allowed in acronyms (the multi-lingual version #slows down acronym extraction slightly so is not the default) #@local_stop_words = split / /, "A OF AT THE IN TO AND VON BEI DER DIE DAS DEM DEN DES UND DE DU A LA LE LES L DANS ET S"; @local_stop_words = split / /, "OF AT THE IN TO AND"; #the file to collate acronyms into $acronym_accumulate_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc","acronym_definitions.pm"); # any acronym definitions which should always be marked up can be copied here # from the acronym_accumulate_file file ... # # #