/* Copyright 2001-2005 The Apache Software Foundation or its licensors, as * applicable. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * This file provides an experimental interface for a Full Text Search * Engine. This current only supports indexing, and can spawn * a java based program using Lucene to do the indexing. The long * term goal is to integrate with Lucene4c, and avoid using the Java * interface at all. */ #include "apr.h" #include "apr_strings.h" #include "apr_thread_proc.h" #include "mbox_search.h" apr_status_t mbox_indexer_init(mbox_indexer_t** ctx_, const char* path, apr_pool_t* p) { #ifdef NO_MBOX_SEARCH return APR_ENOTIMPL; #else lcn_error_t *err; mbox_indexer_t* ctx; apr_status_t rv = APR_SUCCESS; ctx = (mbox_indexer_t*) apr_palloc(p, sizeof(mbox_indexer_t)); apr_pool_create(&ctx->pool, p); apr_pool_create(&ctx->tpool, ctx->pool); ctx->path = apr_pstrdup(ctx->pool, path); lcn_init(); lcn_thread_attach(); err = lcn_analyzer_standard_create(&ctx->analyzer, ctx->pool); if (err) { fprintf (stderr, "error creating analyzer: %s\n", err->message); return APR_EGENERAL; } err = lcn_index_writer_create(&ctx->writer, path, ctx->analyzer, ctx->pool); if (err) { fprintf (stderr, "error creating index: %s\n", err->message); return APR_EGENERAL; } /* TODO: Search the index for existing documents err = lcn_index_searcher_open (&ctx->searcher, path, ctx->pool); if (err) { fprintf (stderr, "error opening searcher... %s\n", err->message); return APR_EGENERAL; } */ *ctx_ = ctx; return rv; #endif } apr_status_t mbox_indexer_optimize(mbox_indexer_t* ctx) { #ifdef NO_MBOX_SEARCH return APR_ENOTIMPL; #else /* not supported by Lucene4c yet */ return APR_SUCCESS; #endif } apr_status_t mbox_indexer_close(mbox_indexer_t* ctx) { #ifdef NO_MBOX_SEARCH return APR_ENOTIMPL; #else lcn_index_writer_close(ctx->writer); /* lcn_index_searcher_close(ctx->searcher); */ apr_pool_clear(ctx->pool); lcn_thread_detach(); return APR_SUCCESS; #endif } #ifndef NO_MBOX_SEARCH static void add_field(apr_pool_t* p, lcn_document_t *doc, const char* key, const char* value) { return; lcn_error_t *err; lcn_field_t *field; err = lcn_field_text(&field, key, value, p); if (err) { fprintf (stderr, "error creating field: %s\n", err->message); } err = lcn_document_add_field(doc, field); if (err) { fprintf (stderr, "error adding field: %s\n", err->message); } } #endif static int doc_count = 0; apr_status_t mbox_indexer_add(mbox_indexer_t* ctx, mbox_search_doc_t* doci) { #ifdef NO_MBOX_SEARCH return APR_ENOTIMPL; #else lcn_error_t *err; lcn_document_t *doc; err = lcn_document_create (&doc, ctx->tpool); if (err) { fprintf (stderr, "error creating document :(\n"); apr_pool_clear(ctx->tpool); return APR_EGENERAL; } if (doci->msgid) { add_field(ctx->tpool, doc, "msgid", doci->msgid); } if (doci->list) { add_field(ctx->tpool, doc, "list", doci->list); } if (doci->domain) { add_field(ctx->tpool, doc, "domain", doci->domain); } if (doci->date) { add_field(ctx->tpool, doc, "date", doci->date); } if (doci->from) { add_field(ctx->tpool, doc, "from", doci->from); } if (doci->subject) { add_field(ctx->tpool, doc, "subject", doci->subject); } err = lcn_index_writer_add_document (ctx->writer, doc, ctx->tpool); if (err) { fprintf (stderr, "error adding document to index: %s\n", err->message); apr_pool_clear(ctx->tpool); return APR_EGENERAL; } doc_count++; if ((doc_count % 50) == 0) { err = lcn_index_writer_optimize(ctx->writer); if (err) { fprintf (stderr, "error optimizing index: %s\n", err->message); apr_pool_clear(ctx->tpool); return APR_EGENERAL; } err = lcn_index_writer_close(ctx->writer); if (err) { fprintf (stderr, "error closing index: %s\n", err->message); apr_pool_clear(ctx->tpool); return APR_EGENERAL; } err = lcn_index_writer_create(&ctx->writer, ctx->path, ctx->analyzer, ctx->pool); if (err) { fprintf (stderr, "error opening index: %s\n", err->message); apr_pool_clear(ctx->tpool); return APR_EGENERAL; } } apr_pool_clear(ctx->tpool); return APR_SUCCESS; #endif } apr_status_t mbox_search_init(mbox_searcher_t** ctx_, const char* path, apr_pool_t* pool) { mbox_searcher_t* ctx; ctx = (mbox_searcher_t*) apr_palloc(pool, sizeof(mbox_searcher_t)); *ctx_ = ctx; return APR_SUCCESS; } apr_status_t mbox_search_query_do(mbox_searcher_t* ctx, mbox_search_query_t* qt, query_callback_fn_t* cb, void* baton) { /* IndexSearcher s(*ctx->reader); mbox_search_doc_t rdoc; BooleanQuery* query = new BooleanQuery(); int i; if (qt->msgid) { Query& q = QueryParser::Parse(qt->msgid, "msgid", *ctx->an); query->add(q, true, false, false); } if (qt->list) { Query& q = QueryParser::Parse(qt->list, "list", *ctx->an); query->add(q, true, false, false); } if (qt->domain) { Query& q = QueryParser::Parse(qt->domain, "domain", *ctx->an); query->add(q, true, false, false); } if (qt->from) { Query& q = QueryParser::Parse(qt->from, "from", *ctx->an); query->add(q, true, false, false); } if (qt->subject) { Query& q = QueryParser::Parse(qt->subject, "subject", *ctx->an); query->add(q, true, false, false); } if (qt->terms) { Query& q = QueryParser::Parse(qt->terms, "content", *ctx->an); query->add(q, true, false, false); } Hits& h = s.search(*query); for (i=0; i < h.Length(); i++){ Document& doc = h.doc(i); rdoc.msgid = doc.get("msgid"); rdoc.list = doc.get("list"); rdoc.domain = doc.get("domain"); memcpy(rdoc.date, doc.get("date"), sizeof(rdoc.date)); rdoc.from = doc.get("from"); rdoc.subject = doc.get("subject"); rdoc.score = h.score(i); if (cb(baton, i, &rdoc) != 0) { break; } } delete query; s.close(); */ return APR_SUCCESS; } apr_status_t mbox_search_close(mbox_searcher_t* ctx) { return APR_SUCCESS; }