Solutions des exercices du TP 2

3.1.1.

 public SimpleLuceneIndex(String docPath, String inputListFile, String indexPath) throws Throwable {
	docIdList             = null;  
        // initialisation de la donnee-membre

	setupDocIds(docPath+"/"+inputListFile); 
        // appel de la methode qui  remplit la liste des id des docs a indexer

	FSDirectory  indexDir = FSDirectory.open (new File(indexPath));
        // instanciation objet permettant la lecture/ecriture d'index sur disque 

	indexWriter           = new IndexWriter(indexDir,new  StandardAnalyzer(), true);
        // instanciation objet gerant l'ecriture effective de l'index 

	gatherAndIndexDocs(docPath);
        // appel de la methode qui fait le travail principal

	indexWriter . optimize();
        // optimisation de l'index pour permettre une recherche plus efficace 

	indexWriter . close();
        // finalisation et ecriture sur disque de l'index

  }

3.1.2

 
   private String readTextFile(String fullPathInputTextFile) throws Throwable {
	    Scanner       scanner = new Scanner(new FileInputStream(fullPathInputTextFile));
	    StringBuilder text    = new StringBuilder();
	    String        NL      = System.getProperty("line.separator");
	    String        sepStr  = "";
	    boolean       qSetSep = true; 
	    while (scanner.hasNextLine()){
		text.append(sepStr + scanner.nextLine());
		if(qSetSep) {
		    qSetSep = false; 
		    sepStr  = NL;
		}
	    }
	    scanner.close();
	    return text.toString();
    }

3.1.3

 
    private void setupDocIds(String fullPathInputListFile) throws Throwable {
	docIdList    = new ArrayList();
	System.out.println("setupListOfDocIds(): Attempting to open " + fullPathInputListFile);
	Scanner scanner = new Scanner(new FileInputStream(fullPathInputListFile));
	while (scanner.hasNextLine()){
	    String[] lineToken = scanner.nextLine().split("\\s+");
	    if(lineToken . length > 1) {
		docIdList . add(lineToken[1]);
		System.out.println("setupListOfDocIds(): Adding id " + lineToken[1]);
	    }
	}
	scanner.close();
    }

3.1.4

 
    private void gatherAndIndexDocs(String path) throws Throwable {
	for (String docId : docIdList) {
	    String       textFile  = path + "/download_" + docId + ".txt";
	    System.out.println("gatherAndIndexDocs(): Processing doc " + textFile);
	    String       docText   = readTextFile(textFile);
	    String       docUrl    = readTextFile(path + "/url_"       + docId + ".txt");
	    String       docTitle  = readTextFile(path + "/title_"     + docId + ".txt");
	    String       docOutLk  = readTextFile(path + "/outlinks_"  + docId + ".txt");
	    String       rankScore = readTextFile(path + "/rankscore_" + docId + ".txt");
	    Document     luceneDoc = new Document();
	    luceneDoc . add(new Field("content",   docText,  Field.Store.NO, 
				      Field.Index.ANALYZED,  Field.TermVector.YES));
	    luceneDoc . add(new Field("url",       docUrl,   Field.Store.YES, 
				      Field.Index.NO));
	    luceneDoc . add(new Field("id",         docId,   Field.Store.YES, 
				      Field.Index.NO));
	    luceneDoc . add(new Field("title",      docTitle,Field.Store.YES, 
				      Field.Index.NO));
	    luceneDoc . add(new Field("outlinks",   docOutLk,Field.Store.YES, 
				      Field.Index.NO));
	    luceneDoc . add(new Field("rankscore",  rankScore,Field.Store.YES, 
				      Field.Index.NOT_ANALYZED));
	    indexWriter . addDocument(luceneDoc);
	}
    }

3.2.

    public SimpleLuceneSearcher(String indexPath,     String queryString, 
				String maxHitsString) throws Throwable {
	int           maxHits          = Integer . valueOf(maxHitsString); 
	IndexSearcher indexSearcher    = new IndexSearcher(FSDirectory.open(new File(indexPath)));
	TopScoreDocCollector collector = TopScoreDocCollector.create(maxHits, true);
	Query         luceneQuery  = 
	    new QueryParser(searchFieldName, new StandardAnalyzer ()).parse(queryString);
	indexSearcher . search(luceneQuery, collector);
	ScoreDoc[] hits                = collector.topDocs().scoreDocs;
	System.out.println("Found " + hits.length + " hits.");
	for(int kHit = 0; kHit < hits.length ;++kHit) {
	    int luceneDocId      = hits[kHit].doc;
	    String luceneScore   = String . valueOf(hits[kHit].score);
	    Document luceneDoc   = indexSearcher.doc(luceneDocId);
	    String   docTitle    = luceneDoc.get(docTitleFieldName);
	    String displayEntry  = String . valueOf(kHit + 1) + ". " + 
		luceneDoc.get(docUrlFieldName) + " " + 
		luceneScore + " " + docTitle;
	    System.out.println(displayEntry);
	}
    }