我正在尝试根据在这里学到的内容对PDF进行索引和搜索:ElasticSearch和附件类型(NEST C#)
但是出现“状态码400,未提供内容”错误。pdf的大小约为7KB,大约可以制作一个,其中只有一些文字。
有什么建议吗?代码和输出如下。谢谢!
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nest;
using System.IO;
using System.Threading;
namespace SearchPDFConsole
{
class Program
{
static void Main(string[] args)
{
// create es client
string index = "pdftestitems";
Console.WriteLine("client stuff...");
var node = new Uri("http://tns-dev.pts-eden.org:9200");
var settings = new ConnectionSettings(node).SetDefaultIndex(index);
var client = new ElasticClient(settings);
Console.WriteLine("deleting index...");
// delete index if any
var di = client.DeleteIndex(index);
Console.WriteLine(di.ConnectionStatus.HttpStatusCode);
Console.WriteLine("creating index...");
// Create your index explicitly before you index any instances of your class. If you don't do this, it will use dynamic mapping and ignore your attribute mapping. If you change your mapping in the future, always recreate the index.
var ci = client.CreateIndex(index, c => c.AddMapping<Document>(m => m.MapFromAttributes()));
Console.WriteLine(di.ConnectionStatus.HttpStatusCode);
string path = "test2.pdf";
var attachment = new Attachment();
attachment.Content = Convert.ToBase64String(File.ReadAllBytes(path));
attachment.ContentType = "application/pdf";
attachment.Name = "test2.pdf";
var doc = new Document()
{
ID = 2,
Title = "test2",
Content = "This is a test."
};
var doc2 = new Document()
{
ID = 1,
Title = "test",
Content = "good luck",
File = attachment
};
Console.WriteLine("Indexing document 1...");
var status = client.Index<Document>(doc);
Console.WriteLine(status.ConnectionStatus);
Console.WriteLine("Indexing document 2...");
var status2 = client.Index<Document>(doc2);
Console.WriteLine(status2.ConnectionStatus);
Console.WriteLine("sleeping 1s...");
Thread.Sleep(1000);
string stringsearch = "test";
//var searchResults = client.Search<Document>(s => s.Type("document").Query(qs => qs.QueryString(q => q.Query(stringsearch))));
var searchResults = client.Search<Document>(s => s.Query(qs => qs.QueryString(q => q.Query(stringsearch))));
Console.WriteLine(searchResults.Documents.Count());
foreach (var sd in searchResults.Documents)
{
Console.WriteLine(sd.Title);
}
}
}
[ElasticType(Name = "document")]
public class Document
{
public int ID { get; set; }
[ElasticProperty(Store = true)]
public string Title { get; set; }
[ElasticProperty(Store = true)]
public string Content { get; set; }
[ElasticProperty(Type = FieldType.Attachment, TermVector = TermVectorOption.WithPositionsOffsets, Store = true)]
public Attachment File { get; set; }
}
public class Attachment
{
[ElasticProperty(Name = "_content")]
public string Content { get; set; }
[ElasticProperty(Name = "_content_type")]
public string ContentType { get; set; }
[ElasticProperty(Name = "_name")]
public string Name { get; set; }
}
}
我的程序的输出:
C:\PROGRAMMING\SearchPDFTest\SearchPDFConsole\bin\Debug>SearchPDFConsole.exe
client stuff...
deleting index...
200
creating index...
200
Indexing document 1...
StatusCode: 201,
Method: PUT,
Url: http://tns-dev.pts-eden.org:9200/pdftestitems/document/2,
Request: {
"iD": 2,
"title": "test2",
"content": "This is a test."
},
Response: <Response stream not captured or already read to completion by
serializer, set ExposeRawResponse() on connectionsettings to force it to be set
on>
Indexing document 2...
StatusCode: 400,
Method: PUT,
Url: http://tns-dev.pts-eden.org:9200/pdftestitems/document/1,
Request: {
"iD": 1,
"title": "test",
"content": "good luck",
"file": {
"_content": "JVBERi0xLjYNJeLjz9MNCjE5IDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9
GaXJzdCA5L0xlbmd0aCAxMzkvTiAyL1R5cGUvT2JqU3RtPj5zdHJlYW0NCmjeTM3BCsIwDAbgV/mfwLQ
brQijB3cUYQxvY4figuzSjrYDfXvbenCH/JCEL2khoKAVuo56v7sESbd1iZPKi3GmOy+rvfr3JE4CpbR
scp4vOWd6fDamwb44GlMPuMQuRcimaOqD36pFdUUdLx6ngw1Zoq1u5Oj38OQI/et9sokh/v+M+QowAEh
[LOTS MORE BASE64 ENCODED STUFF]
mL1dbMSAyIDFdPj5zdHJlYW0NCmjeYmIAAiZGpmUMTAwMPkDWP0UGpv/8a4Gsj8GMQDHG/yACxGIAsRi
YIaz/Aun/gKwaoDamDJDeqSBWIZBgfAoiZoAIRyDx8g1I9iWIuwpISGkyAAQYAArYEhcNCmVuZHN0cmV
hbQ1lbmRvYmoNc3RhcnR4cmVmDQo3MTE2DQolJUVPRg0K",
"_content_type": "application/pdf",
"_name": "test2.pdf"
}
},
Response: <Response stream not captured or already read to completion by
serializer, set ExposeRawResponse() on connectionsettings to force it to be set
on>
ExceptionMessage: No content is provided.
StackTrace:
sleeping 1s...
1
test2
我用在Word中创建的简单PDF尝试了您的代码,对我来说似乎很好用。
我正在运行带有Elasticsearch-mapper-attachments 2.4.3的ES 1.4.4。这两个索引操作都返回正确的2xx状态代码,之后我就可以搜索PDF。
您可以确定PDF正确吗?尝试从另一个程序创建一个新的?
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句