I indexed all mysql table data records (half a million records) in elasticsearch using jdbc river . Once everything went well, and then suddenly the river started throwing some kind of strange index error, whenever it tried to index the document.
Error:
[2012-10-10 16:20:11,867][DEBUG][action.bulk] [Man-Wolf] [monitoring][0] failed to bulk item (index) index {[monitoring][mention_reports][70138], source[{"location":null,"wday":6,"monitoring_profile_id":22,"stream_type":"facebook","score":0,"tweet_count":0,"city":null,"to_user_id":0,"sentiment":-1,"post_message":null,"description":null,"location_data_source":null,"video_id":null,"year":2012,"post_name":"Danny Waddell Photos","video_thumb_url":null,"user_follower":0,"tweet_text":null,"country":null,"content":null,"picture":"http://photos-e.ak.fbcdn.net/hphotos-ak-prn1/547714_507439249282865_1395188712_s.jpg","updated_at":"2012-08-04T16:06:59Z","model_id":82884,"month":8,"day":4,"profile_image_url":null,"result_type":null,"from_user_id":1407391780,"stream":null,"geo_coordinates_long":0.0,"tweet_id":0,"link":null,"state":null,"post_id":"1407391780_227741774015103","model_name":"FacebookSearchResult","id":70138,"title":null,"sub_location_1":null,"user_friends":0,"sub_location_2":null,"from_user":"Stephanie Morris","post_type":"photo","creation_time":"2012-08-04T16:05:20Z","created_at":"2012-08-04T16:06:59Z","to_user":null,"user_pic":"https://graph.facebook.com/1407391780/picture","video_type":"youtube","is_active":1,"caption":"obama will let them die for america but not vote","hour":16,"country_code":null,"duration":0,"positive_keyword":"obama","geo_coordinates_lat":0.0}]} org.elasticsearch.index.engine.IndexFailedEngineException: [monitoring][0] Index failed for [mention_reports#70138] at org.elasticsearch.index.engine.robin.RobinEngine.index(RobinEngine.java:482) at org.elasticsearch.index.shard.service.InternalIndexShard.index(InternalIndexShard.java:323) at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:158) at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$AsyncShardOperationAction.performOnPrimary(TransportShardReplicationOperationAction.java:529) at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$AsyncShardOperationAction$1.run(TransportShardReplicationOperationAction.java:427) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) at java.lang.Thread.run(Thread.java:636) Caused by: java.io.FileNotFoundException: /var/www/elasticsearch-0.19.0/data/elasticsearch/nodes/0/indices/monitoring/0/index/_1pr.tvd (Too many open files) at java.io.RandomAccessFile.open(Native Method) at java.io.RandomAccessFile.<init>(RandomAccessFile.java:233) at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:441) at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:306) at org.elasticsearch.index.store.Store$StoreDirectory.createOutput(Store.java:418) at org.elasticsearch.index.store.Store$StoreDirectory.createOutput(Store.java:390) at org.apache.lucene.index.TermVectorsTermsWriter.initTermVectorsWriter(TermVectorsTermsWriter.java:123) at org.apache.lucene.index.TermVectorsTermsWriter.finishDocument(TermVectorsTermsWriter.java:143) at org.apache.lucene.index.TermVectorsTermsWriter$PerDoc.finish(TermVectorsTermsWriter.java:250) at org.apache.lucene.index.DocFieldProcessorPerThread$PerDoc.finish(DocFieldProcessorPerThread.java:348) at org.apache.lucene.index.DocumentsWriter$WaitQueue.writeDocument(DocumentsWriter.java:1404) at org.apache.lucene.index.DocumentsWriter$WaitQueue.add(DocumentsWriter.java:1424) at org.apache.lucene.index.DocumentsWriter.finishDocument(DocumentsWriter.java:1043) at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:772) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:2066) at org.elasticsearch.index.engine.robin.RobinEngine.innerIndex(RobinEngine.java:565) at org.elasticsearch.index.engine.robin.RobinEngine.index(RobinEngine.java:477) ... 7 more
JDBC River:
curl -XPUT 'localhost:9200/_river/river_mention_reports/_meta' -d '{ "type" : "jdbc", "jdbc" : { "driver" : "com.mysql.jdbc.Driver", "url" : "jdbc:mysql://localhost:3306/monitoring", "user" : "USERNAME", "password" : "PASSWORD", "sql" : "select * from mention_reports", "poll" : "2h", "versioning" : false }, "index" : { "index" : "monitoring", "type" : "mention_reports", "bulk_size" : 200, "bulk_timeout" : "60s" } }'
My mapping:
curl -XPOST http://localhost:9200/monitoring/ -d ' { "settings":{ "index":{ "number_of_shards":5, "number_of_replicas":1 }, "analysis":{ "filter":{ "myCustomShingle":{ "type":"shingle", "max_shingle_size":3, "output_unigrams":true }, "myCustomStop":{ "type":"stop", "stopwords":["a","about","abov ... ] } }, "analyzer":{ "myAnalyzer":{ "type":"custom", "tokenizer":"standard", "filter":[ "lowercase", "myCustomShingle", "stop", "myCustomStop" ] } } } }, "mappings":{ "mention_reports":{ "_source":{ "enabled":true }, "_all":{ "enabled":false }, "index.query.default_field":"post_message", "properties":{ "id":{ "type":"string", "index":"not_analyzed", "include_in_all" : "false", "null_value" : "null" }, "creation_time":{ "type":"date" }, "from_user":{ "type":"string", "analyzer":"standard", "include_in_all":"false", "null_value":0 }, "from_user_id":{ "type":"string", "index":"not_analyzed", "include_in_all":"false", "null_value":"null" }, . . . "title":{ "type":"string", "analyzer":"myAnalyzer", "term_vector":"with_positions_offsets", "null_value" : "null" } } } } } '
How to fix it?