当前,许多应用重度依赖于搜索功能。从电子商务网站中寻找合适的产品,到社交网络中搜索寻人,再到地图网站中寻找POI和地址,依赖于搜索的应用非常广泛。
亚马逊新推出的云搜索服务,为自行实现搜索功能或定制安装Apache Lucene、Apache Solr和elasticsearch等流行产品提供了可行的替代方式。他们这样描述该服务:
数据定义尽管亚马逊提供了数据上传和搜索响应的数据定义(以XML和JSON两种方式),但数据上传的文档中仅定义了Relax NG 模式,而搜索响应则未定义任何模式。 在我们的实现方式中,我们决定使用XML数据格式而不是JSON,这是因为进行XML数据封装更加简单——XML使用规范的数据格式,而JSON则是动态的(JSON的标签是动态定义,每个请求各异)。我们分别用下边的两种模式(列表1和列表2)来上传数据和搜索结果。 <?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">
………………………………………………………………………………………….
<xsd:complexType name="fieldType">
<xsd:simpleContent>
<xsd:extension base="xsd:string">
<xsd:attribute name="name" type="field_nameType" />
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
<xsd:complexType name="addType">
<xsd:sequence>
<xsd:element name="field" type="fieldType" maxOccurs="unbounded" />
</xsd:sequence>
<xsd:attribute name="id" type="IDType" />
<xsd:attribute name="version" type="versionType" />
<xsd:attribute name="lang" type="xsd:language" />
</xsd:complexType>
<xsd:complexType name="deleteType">
<xsd:attribute name="id" type="IDType" />
<xsd:attribute name="version" type="versionType" />
</xsd:complexType>
<xsd:complexType name="batchType">
<xsd:sequence>
<xsd:element name="add" type="addType" minOccurs="0" maxOccurs="unbounded" />
<xsd:element name="delete" type="deleteType" minOccurs="0" maxOccurs="unbounded" />
</xsd:sequence>
</xsd:complexType>
<xsd:element name="batch" type="batchType" />
<xsd:simpleType name="statusType">
<xsd:restriction base="xsd:string">
<xsd:enumeration value="success"/>
<xsd:enumeration value="error" />
</xsd:restriction>
</xsd:simpleType>
<xsd:complexType name="errorsType">
<xsd:sequence>
<xsd:element name="error" type="xsd:string" maxOccurs="unbounded" />
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="warningsType">
<xsd:sequence>
<xsd:element name="warning" type="xsd:string" maxOccurs="unbounded" />
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="responseType">
<xsd:sequence>
<xsd:element name="errors" type="errorsType" minOccurs="0" />
<xsd:element name="warnings" type="warningsType" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="status" type="statusType"/>
<xsd:attribute name="adds" type="xsd:int"/>
<xsd:attribute name="deletes" type="xsd:int"/>
</xsd:complexType>
<xsd:element name="response" type="responseType" />
</xsd:schema>
Listing 1 Upload data schema
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="http://cloudsearch.amazonaws.com/2011-02-01/results"
xmlns="http://cloudsearch.amazonaws.com/2011-02-01/results"
elementFormDefault="qualified">
<xsd:complexType name="constraintType">
<xsd:attribute name="value" type="xsd:string"/>
<xsd:attribute name="count" type="xsd:int"/>
</xsd:complexType>
<xsd:complexType name="facetType">
<xsd:sequence>
<xsd:element name="constraint" type="constraintType" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
<xsd:complexType name="facetsType">
<xsd:sequence>
<xsd:element name="facet" type="facetType" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="infoType">
<xsd:attribute name="rid" type="xsd:string" />
<xsd:attribute name="time-ms" type="xsd:int" />
<xsd:attribute name="cpu-time-ms" type="xsd:int" />
</xsd:complexType>
<xsd:complexType name="dType">
<xsd:simpleContent>
<xsd:extension base="xsd:string">
<xsd:attribute name="name" type="xsd:string" />
</xsd:extension>
</xsd:simpleContent>
</xsd:complexType>
<xsd:complexType name="hitType">
<xsd:sequence>
<xsd:element name="d" type="dType" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="id" type="xsd:string" />
</xsd:complexType>
<xsd:complexType name="hitsType">
<xsd:sequence>
<xsd:element name="hit" type="hitType" maxOccurs="unbounded"/>
</xsd:sequence>
<xsd:attribute name="found" type="xsd:int" />
<xsd:attribute name="start" type="xsd:int" />
</xsd:complexType>
<xsd:complexType name="resultsType">
<xsd:sequence>
<xsd:element name="rank" type="xsd:string" />
<xsd:element name="match-expr" type="xsd:string" />
<xsd:element name="hits" type="hitsType" minOccurs="0"/>
<xsd:element name="facets" type="facetsType" minOccurs="0"/>
<xsd:element name="info" type="infoType" />
</xsd:sequence>
</xsd:complexType>
<xsd:element name="results" type="resultsType"/>
<xsd:complexType name="messageType">
<xsd:attribute name="severity" type="xsd:string" />
<xsd:attribute name="code" type="xsd:string" />
<xsd:attribute name="message" type="xsd:string"/>
</xsd:complexType>
<xsd:complexType name="errorType">
<xsd:sequence>
<xsd:element name="error" type="xsd:string" />
<xsd:element name="rid" type="xsd:string" />
<xsd:element name="time-ms" type="xsd:int" />
<xsd:element name="cpu-time-ms" type="xsd:int" />
<xsd:element name="messages" type="messageType" maxOccurs="unbounded" />
</xsd:sequence>
</xsd:complexType>
<xsd:element name="error" type="errorType" />
</xsd:schema>
Listing 2 Search results data schema
我们使用xjc binding compiler 生成上述的两种模式的Java类,这样就能通过Java Architecture for XML Binding (JAXB)进行自动封装/解封装。 |