root/trunk/components/server/src/ome/services/fulltext/BridgeHelper.java
| Revision 2804, 9.7 kB (checked in by jmoore, 4 months ago) |
|---|
| Line | |
|---|---|
| 1 | /* |
| 2 | * $Id$ |
| 3 | * |
| 4 | * Copyright 2008 Glencoe Software, Inc. All rights reserved. |
| 5 | * Use is subject to license terms supplied in LICENSE.txt |
| 6 | */ |
| 7 | |
| 8 | package ome.services.fulltext; |
| 9 | |
| 10 | import java.io.File; |
| 11 | import java.io.Reader; |
| 12 | import java.io.StringReader; |
| 13 | import java.util.Collections; |
| 14 | import java.util.List; |
| 15 | import java.util.Map; |
| 16 | |
| 17 | import ome.conditions.ApiUsageException; |
| 18 | import ome.io.nio.OriginalFilesService; |
| 19 | import ome.model.IObject; |
| 20 | import ome.model.core.OriginalFile; |
| 21 | import ome.services.messages.RegisterServiceCleanupMessage; |
| 22 | import ome.services.messages.ReindexMessage; |
| 23 | |
| 24 | import org.apache.commons.logging.Log; |
| 25 | import org.apache.commons.logging.LogFactory; |
| 26 | import org.apache.lucene.document.Document; |
| 27 | import org.apache.lucene.document.Field; |
| 28 | import org.apache.lucene.document.Field.Store; |
| 29 | import org.hibernate.search.bridge.FieldBridge; |
| 30 | import org.springframework.context.ApplicationEventPublisher; |
| 31 | import org.springframework.context.ApplicationEventPublisherAware; |
| 32 | |
| 33 | /** |
| 34 | * Base class for building custom {@link FieldBridge} implementations. |
| 35 | * |
| 36 | * To force handling of null values, the |
| 37 | * {@link #add(Document, String, String, Store, org.apache.lucene.document.Field.Index, Float)} |
| 38 | * methods throw {@link NullValueException} which can convert itself to a |
| 39 | * {@link RuntimeException} via {@link NullValueException#convert(Object)} if |
| 40 | * that is the simplest course of action. Alternatively, you could re-add the |
| 41 | * value with a null-token like "null". |
| 42 | * |
| 43 | * @author Josh Moore, josh at glencoesoftware.com |
| 44 | * @since 3.0-Beta3 |
| 45 | */ |
| 46 | public abstract class BridgeHelper implements FieldBridge, |
| 47 | ApplicationEventPublisherAware { |
| 48 | |
| 49 | /** |
| 50 | * Name of the {@link Field} which contains the union of all fields. This is |
| 51 | * also the default search field, so users need not append the value to |
| 52 | * search the full index. A field name need only be added to a search to |
| 53 | * eliminate other fields. |
| 54 | * |
| 55 | * @DEV.TODO add to constants |
| 56 | */ |
| 57 | public final static String COMBINED = "combined_fields"; |
| 58 | |
| 59 | private final Log log = LogFactory.getLog(getClass()); |
| 60 | |
| 61 | protected ApplicationEventPublisher publisher; |
| 62 | |
| 63 | public final Log logger() { |
| 64 | return log; |
| 65 | } |
| 66 | |
| 67 | public void setApplicationEventPublisher(ApplicationEventPublisher publisher) { |
| 68 | this.publisher = publisher; |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Method to be implemented by all {@link FieldBridge bridges}. The "value" |
| 73 | * argument is an active Hibernate object, and so the full graph can be |
| 74 | * walked. |
| 75 | */ |
| 76 | public abstract void set(final String name, final Object value, |
| 77 | final Document document, final Field.Store store2, |
| 78 | final Field.Index index, final Float boost); |
| 79 | |
| 80 | /** |
| 81 | * Helper method which takes the parameters from the |
| 82 | * {@link #set(String, Object, Document, Store, org.apache.lucene.document.Field.Index, Float)} |
| 83 | * method (possibly modified) as well as the parsed {@link String} value |
| 84 | * which should be added to the index, and adds two fields. One with the |
| 85 | * given field name and another to the {@link #COMBINED} field which is the |
| 86 | * default search provided to users. In addition to storing the value as is, |
| 87 | * another {@link Field} will be added for both the named and |
| 88 | * {@link #COMBINED} cases using a {@link StringReader} to allow Lucene to |
| 89 | * tokenize the {@link String}. |
| 90 | * |
| 91 | * @param d |
| 92 | * Document as passed to the set method. Do not modify. |
| 93 | * @param field |
| 94 | * Field name which probably <em/>should</em> be modified. If |
| 95 | * this value is null, then the "value" will only be added to the |
| 96 | * {@link #COMBINED} field. |
| 97 | * @param value |
| 98 | * Value which has been parsed out for this field. Should |
| 99 | * <em/>not</em> be null. If you need to store a null value in |
| 100 | * the index, use a null token like "null". |
| 101 | * @param store |
| 102 | * Whether or not to store the string value in the index. Note: |
| 103 | * no values are stored in the {@link #COMBINED} field to prevent |
| 104 | * duplication. |
| 105 | * @param index |
| 106 | * Whether or not to make the string searchable. |
| 107 | * @param boost |
| 108 | * Positive float which increases or decreases search importance |
| 109 | * for a field. Default is 1.0. |
| 110 | */ |
| 111 | protected void add(Document d, String field, String value, |
| 112 | Field.Store store, Field.Index index, Float boost) { |
| 113 | |
| 114 | if (value == null) { |
| 115 | throw new RuntimeException( |
| 116 | "Value for indexing cannot be null. Use a null token instead."); |
| 117 | } |
| 118 | |
| 119 | // If the field == null, then we ignore it, to allow easy addition |
| 120 | // of Fields as COMBINED |
| 121 | if (field != null) { |
| 122 | final Field named_field = new Field(field, value, store, index); |
| 123 | if (boost != null) { |
| 124 | named_field.setBoost(boost); |
| 125 | } |
| 126 | d.add(named_field); |
| 127 | final Field named_parsed_field = new Field(field, new StringReader( |
| 128 | value)); |
| 129 | d.add(named_parsed_field); |
| 130 | } |
| 131 | |
| 132 | // Never storing in combined fields, since it's duplicated |
| 133 | final Field combined_field = new Field(COMBINED, value, Store.NO, index); |
| 134 | if (boost != null) { |
| 135 | combined_field.setBoost(boost); |
| 136 | } |
| 137 | d.add(combined_field); |
| 138 | final Field combined_parsed_field = new Field(COMBINED, |
| 139 | new StringReader(value)); |
| 140 | d.add(combined_parsed_field); |
| 141 | } |
| 142 | |
| 143 | /** |
| 144 | * Second helper method used when parsing files. The {@link OriginalFile} |
| 145 | * will be passed to {@link #parse(OriginalFile, OriginalFilesService, Map)} |
| 146 | * to generate {@link Reader} instances, which will be read until they |
| 147 | * signal an end, however it is not the responsibility of this instance to |
| 148 | * close the Readers since this happens asynchronously. |
| 149 | * |
| 150 | * The contents of the file will be parsed both to {@link #COMBINED} and |
| 151 | * "file.contents". |
| 152 | * |
| 153 | * @param d |
| 154 | * {@link Document} as passed to set. Do not modify. |
| 155 | * @params name String to be used as the name of the field. If null, then |
| 156 | * the contens will only be added to the {@link #COMBINED} |
| 157 | * {@link Field}. |
| 158 | * @param file |
| 159 | * Non-null, possibly unloaded {@link OriginalFile} which is used |
| 160 | * to look up the file on disk. |
| 161 | * @param files |
| 162 | * {@link OriginalFileServer} which knows how to find where this |
| 163 | * {@link OriginalFile} is stored on disk. |
| 164 | * @param parsers |
| 165 | * {@link Map} of {@link FileParser} instances to be used based |
| 166 | * on the {@link Format} of the {@link OriginalFile} |
| 167 | * @param boost |
| 168 | * Positive float which increases or decreases search importance |
| 169 | * for a field. Default is 1.0. |
| 170 | */ |
| 171 | protected void addContents(final Document d, final String name, |
| 172 | final OriginalFile file, final OriginalFilesService files, |
| 173 | final Map<String, FileParser> parsers, final Float boost) { |
| 174 | |
| 175 | if (file == null) { |
| 176 | throw new RuntimeException( |
| 177 | "File cannot be null. Either do not attempt to add " |
| 178 | + "anything for this field, or use a null token like " |
| 179 | + "\"null\" instead."); |
| 180 | } |
| 181 | |
| 182 | Field f; |
| 183 | if (name != null) { |
| 184 | for (Reader parsed : parse(file, files, parsers)) { |
| 185 | f = new Field(name, parsed); |
| 186 | if (boost != null) { |
| 187 | f.setBoost(boost); |
| 188 | } |
| 189 | d.add(f); |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | for (Reader parsed : parse(file, files, parsers)) { |
| 194 | f = new Field(COMBINED, parsed); |
| 195 | if (boost != null) { |
| 196 | f.setBoost(boost); |
| 197 | } |
| 198 | d.add(f); |
| 199 | } |
| 200 | |
| 201 | } |
| 202 | |
| 203 | /** |
| 204 | * Publishes a {@link ReindexMessage} which will get processed |
| 205 | * asynchronously. |
| 206 | */ |
| 207 | protected <T extends IObject> void reindex(T object) { |
| 208 | reindexAll(Collections.singletonList(object)); |
| 209 | } |
| 210 | |
| 211 | /** |
| 212 | * Publishes a {@link ReindexMessage} which will get processed |
| 213 | * asynchronously. |
| 214 | */ |
| 215 | protected <T extends IObject> void reindexAll(List<T> list) { |
| 216 | if (publisher == null) { |
| 217 | throw new ApiUsageException( |
| 218 | "Bridge is not configured for sending messages."); |
| 219 | } |
| 220 | for (T object : list) { |
| 221 | if (object == null || object.getId() == null) { |
| 222 | throw new ApiUsageException("Object cannot be null"); |
| 223 | } |
| 224 | } |
| 225 | final ReindexMessage<T> rm = new ReindexMessage<T>(this, list); |
| 226 | publisher.publishEvent(rm); |
| 227 | } |
| 228 | |
| 229 | /** |
| 230 | * Attempts to parse the given {@link OriginalFile}. If any of the |
| 231 | * necessary components is null, then it will return an empty, but not null |
| 232 | * {@link Iterable}. Also looks for the catch all parser under "*" |
| 233 | * |
| 234 | * @param file |
| 235 | * Can be null. |
| 236 | * @return will not be null. |
| 237 | */ |
| 238 | protected Iterable<Reader> parse(final OriginalFile file, |
| 239 | final OriginalFilesService files, |
| 240 | final Map<String, FileParser> parsers) { |
| 241 | if (files != null && parsers != null) { |
| 242 | if (file != null && file.getFormat() != null) { |
| 243 | String path = files.getFilesPath(file.getId()); |
| 244 | String format = file.getFormat().getValue(); |
| 245 | FileParser parser = parsers.get(format); |
| 246 | if (parser != null) { |
| 247 | return parser.parse(new File(path)); |
| 248 | } else { |
| 249 | parser = parsers.get("*"); |
| 250 | if (parser != null) { |
| 251 | return parser.parse(new File(path)); |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | return FileParser.EMPTY; |
| 257 | } |
| 258 | } |
Note: See TracBrowser
for help on using the browser.
