#!/usr/bin/env ruby # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # TODO: distill common shell script needs into common file for parsing parameters for Solr URL, input filename, -debug, etc # script/runner or script/console-like, from Rails. A data mapper would be a great generalizable piece. require 'solr' solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr" dl_filename = ARGV[0] debug = ARGV[1] == "-debug" if dl_filename == nil puts "You must pass a filename as an option." exit end source = Solr::Importer::DelimitedFileSource.new(dl_filename) # Exported column names # medium,associatedURL,boxHeightInInches,boxLengthInInches,boxWeightInPounds,boxWidthInInches, # scannednumber,upc,asin,country,title,fullTitle,series,numberInSeries,edition,aspect,mediacount, # genre,price,currentValue,language,netrating,description,owner,publisher,published,rare,purchaseDate,rating, # used,signed,hasExperienced,notes,location,paid,condition,notowned,author,illustrator,pages mapping = { :id => Proc.new {|data| data[:upc].empty? ? data[:asin] : data[:upc]}, :medium_facet => :medium, :country_facet => :country, :signed_facet => :signed, :rating_facet => :netrating, :language_facet => :language, :genre_facet => Proc.new {|data| data[:genre].split('/').map {|s| s.strip}}, :title_text => :title, :full_title_text => :fullTitle, :asin_display => :asin, :notes_text => :notes, :publisher_facet => :publisher, :description_text => :description, :author_text => :author, :pages_text => :pages, :published_year_facet => Proc.new {|data| data[:published].scan(/\d\d\d\d/)[0]} } indexer = Solr::Indexer.new(source, mapping, :debug => debug) indexer.index do |record, solr_document| # can modify solr_document before it is indexed here end indexer.solr.commit unless debug indexer.solr.optimize unless debug