How can I sort in a circle in Lucene?

I want to write a ComparatorSource for Lucene 3.6 with which I can execute the Round Robin sorting style:

Say we have something like this:

A, A, A, B, C, D, D

And I want them to be sorted in that order (not necessarily in alphabetical order)

A, B, C, D, A, D, A

How can I perform this behavior?

This is what I came up with, but it does not work, because temporary renters are updated in the right place. Where can i do this?

public class RoundRobinFieldComparatorSource<T> extends FieldComparatorSource { private static long seed = new Random().nextInt(); public static void setNewSeed(long seed) { RoundRobinFieldComparatorSource.seed = seed; } private static final long serialVersionUID = -8959374194451783596L; private final Set<T> roundRobinValues; private final boolean excluded; private final TwoWayStringBridge stringBridge; private final Set<String> preferredValues; public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge, Set<T> roundRobinValues) { this(stringBridge, roundRobinValues, new HashSet<T>()); } /** * same as * {@link #RoundRobinFieldComparatorSource(TwoWayStringBridge, Set, boolean)} * but with excluded set to false */ public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge, Set<T> roundRobinValues, Set<T> preferredValues) { this(stringBridge, roundRobinValues, preferredValues, false); } /** * @param excluded * determines whether the passed roundRobinValues are to be * excluded in scheduling */ public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge, Set<T> roundRobinValues, Set<T> preferredValues, boolean excluded) { this.stringBridge = stringBridge; this.roundRobinValues = roundRobinValues; this.excluded = excluded; this.preferredValues = new HashSet<>(); if(preferredValues != null) { for(T val : preferredValues) { this.preferredValues.add(this.stringBridge.objectToString(val)); } } } public static <T> Set<T> toSet(Collection<T> iterable) { Set<T> ret = new HashSet<>(); ret.addAll(iterable); return ret; } @Override public FieldComparator<String> newComparator(final String fieldName, int numHits, int sortPos, final boolean reversed) throws IOException { return new FieldComparator<String>() { private String[] values; private String bottom; private String[] currentReaderValues; private Map<String, Integer> passedTimeShards; private Random random; @Override public int compare(int slot1, int slot2) { return this.compare(this.values[slot1], this.values[slot2]); } @Override public int compareBottom(int doc) throws IOException { return this.compare(this.bottom, this.currentReaderValues[doc]); } @Override public void copy(int slot, int doc) throws IOException { this.values[slot] = this.currentReaderValues[doc]; } @Override public void setBottom(int slot) { this.bottom = this.values[slot]; } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { // TODO: maybe do a copy? this.currentReaderValues = FieldCache.DEFAULT.getStrings( reader, fieldName); this.values = new String[reader.maxDoc()]; this.passedTimeShards = new HashMap<>(); this.random = new Random(seed); // initialize our timeshards // and make sure the values that have not been passed // and were returned have their timeshard equal to // Integer.MAX_VALUE so they won't be "scheduled" // for a spot in the result list for (String string : this.currentReaderValues) { @SuppressWarnings("unchecked") T object = (T) RoundRobinFieldComparatorSource.this.stringBridge .stringToObject(string); boolean scheduled = RoundRobinFieldComparatorSource.this.roundRobinValues .contains(object); if (RoundRobinFieldComparatorSource.this.excluded) { scheduled = !scheduled; } int curTimeShard = 0; if (!scheduled) { curTimeShard = Integer.MAX_VALUE; } this.passedTimeShards.put(string, curTimeShard); } } @Override public String value(int slot) { return this.values[slot]; } private int compare(String first, String second) { if (first == null && second == null) { return 0; } else if (first == null) { return 1; } else if (second == null) { return -1; } Integer firstTimeShard = this.passedTimeShards.get(first); Integer secondTimeShard = this.passedTimeShards.get(second); int result = Integer.compare(firstTimeShard, secondTimeShard); if (result == 0) { // now check for preferred values (these are the first ones // to be "scheduled" in each shard) if (RoundRobinFieldComparatorSource.this.preferredValues .contains(first) && RoundRobinFieldComparatorSource.this.preferredValues .contains(second)) { // coin flip if equal if (this.random.nextBoolean()) { result = 1; } else { result = -1; } } else if (RoundRobinFieldComparatorSource.this.preferredValues .contains(first)) { result = -1; } else if (RoundRobinFieldComparatorSource.this.preferredValues .contains(second)) { result = 1; } else { // coin flip if equal if (this.random.nextBoolean()) { result = 1; } else { result = -1; } } } if (reversed) { result *= -1; } // and add one timeshard to the "winner" of this round if (result < 0) { firstTimeShard += 1; this.passedTimeShards.put(first, firstTimeShard); } else { secondTimeShard += 1; this.passedTimeShards.put(second, secondTimeShard); } return result; } }; } } 

Thanks in advance.

0
source share
1 answer
 package de.indv.rentino.mk3.hibernate.lucene.search.sort; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; import org.hibernate.search.bridge.TwoWayStringBridge; /** * Round Robin Sorting for Lucene * * @author Martin Braun */ public class RoundRobinFieldComparatorSource<T> extends FieldComparatorSource { private static long seed = new Random().nextInt(); public static void setNewSeed(long seed) { RoundRobinFieldComparatorSource.seed = seed; } private static final long serialVersionUID = -8959374194451783596L; private final Set<T> roundRobinValues; private final boolean excluded; private final TwoWayStringBridge stringBridge; private final Set<String> preferredValues; public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge, Set<T> roundRobinValues) { this(stringBridge, roundRobinValues, new HashSet<T>()); } /** * same as * {@link #RoundRobinFieldComparatorSource(TwoWayStringBridge, Set, boolean)} * but with excluded set to false */ public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge, Set<T> roundRobinValues, Set<T> preferredValues) { this(stringBridge, roundRobinValues, preferredValues, false); } /** * @param excluded * determines whether the passed roundRobinValues are to be * excluded in scheduling */ public RoundRobinFieldComparatorSource(TwoWayStringBridge stringBridge, Set<T> roundRobinValues, Set<T> preferredValues, boolean excluded) { this.stringBridge = stringBridge; this.roundRobinValues = roundRobinValues; this.excluded = excluded; this.preferredValues = new HashSet<>(); if (preferredValues != null) { for (T val : preferredValues) { this.preferredValues.add(this.stringBridge.objectToString(val)); } } } public static <T> Set<T> toSet(Collection<T> iterable) { Set<T> ret = new HashSet<>(); ret.addAll(iterable); return ret; } @Override public FieldComparator<Integer> newComparator(final String fieldName, int numHits, int sortPos, final boolean reversed) throws IOException { return new FieldComparator<Integer>() { private Integer[] values; private Integer[] slotsForDocs; private Integer bottom; @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { final Random random = new Random( RoundRobinFieldComparatorSource.seed); // TODO: maybe do a copy? String[] strings = FieldCache.DEFAULT.getStrings(reader, fieldName); this.values = new Integer[reader.maxDoc()]; Map<String, List<Integer>> scheduledOccurencesPerString = new HashMap<>(); Map<String, List<Integer>> unscheduledOccurencesPerString = new HashMap<>(); List<String> order = new ArrayList<>(); List<String> excludedOrder = new ArrayList<>(); { { Set<String> alreadyAdded = new HashSet<>(); for (int i = 0; i < strings.length; ++i) { String string = strings[i]; this.values[i] = Integer.MAX_VALUE; if (string != null) { @SuppressWarnings("unchecked") T object = (T) RoundRobinFieldComparatorSource.this.stringBridge .stringToObject(string); boolean scheduled = RoundRobinFieldComparatorSource.this.roundRobinValues .contains(object); if (RoundRobinFieldComparatorSource.this.excluded) { scheduled = !scheduled; } if (scheduled) { if (!alreadyAdded.contains(string)) { alreadyAdded.add(string); order.add(string); } List<Integer> occurences = scheduledOccurencesPerString .get(string); if (occurences == null) { occurences = new ArrayList<>(); scheduledOccurencesPerString.put( string, occurences); } occurences.add(i); } else { if (!alreadyAdded.contains(string)) { alreadyAdded.add(string); excludedOrder.add(string); } List<Integer> occurences = unscheduledOccurencesPerString .get(string); if (occurences == null) { occurences = new ArrayList<>(); unscheduledOccurencesPerString.put( string, occurences); } occurences.add(i); } } } } sortByPreference( order, random, RoundRobinFieldComparatorSource.this.preferredValues); sortByPreference(excludedOrder, random, new HashSet<String>()); } int sortIndex = 0; // schedule the included ones sortIndex = schedule(sortIndex, order, scheduledOccurencesPerString, values); // schedule the excluded ones sortIndex = schedule(sortIndex, excludedOrder, unscheduledOccurencesPerString, values); // slotsForDocs equals the values before the actual sorting has // started // in the lucene methods so we can just copy the values array this.slotsForDocs = new Integer[values.length]; System.arraycopy(this.values, 0, this.slotsForDocs, 0, this.values.length); } @Override public int compare(int slot1, int slot2) { return Integer.compare(this.values[slot1], this.values[slot2]); } @Override public int compareBottom(int doc) throws IOException { return Integer.compare(this.bottom, this.slotsForDocs[doc]); } @Override public void setBottom(int slot) { this.bottom = this.values[slot]; } @Override public void copy(int slot, int doc) throws IOException { this.values[slot] = this.slotsForDocs[doc]; } @Override public Integer value(int slot) { return this.values[slot]; } }; } private static void sortByPreference(List<String> list, final Random random, final Set<String> preferredValues) { //and shuffle before we do anything dumb Collections.shuffle(list, random); Collections.sort(list, new Comparator<String>() { @Override public int compare(String first, String second) { int result; if (preferredValues.contains(first) && preferredValues.contains(second)) { result = 0; } else if (preferredValues.contains(first)) { result = -1; } else if (preferredValues.contains(second)) { result = 1; } else { result = 0; } return result; } }); } private static int schedule(int sortIndex, List<String> order, Map<String, List<Integer>> positions, Integer[] values) { while (positions.size() > 0) { // repeatedly iterate over the order // and put the index for the next element // with the same string if possible for (int i = 0; i < order.size(); ++i) { String cur = order.get(i); if (positions.get(cur) != null) { // get an occurence of the string Integer pos = positions.get(cur).remove(0); // and put the sort index into the values array values[pos] = sortIndex++; if (positions.get(cur).size() == 0) { positions.remove(cur); } } } } return sortIndex; } } 

This is my decision.

0
source

Source: https://habr.com/ru/post/979304/


All Articles