JavaScript代码: 支持豆瓣的zotero的translator
001 {
002 "translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
003 "translatorType":4,
004 "label":"Douban",
005 "creator":"Ace Strong<acestrong@gmail.com>",
006 "target":"^https?://www.douban.com/subject",
007 "minVersion":"1.0.0b6",
008 "maxVersion":"",
009 "priority":100,
010 "inRepository":true,
011 "lastUpdated":"2009-3-2 15:20:00"
012 }
013
014 function detectWeb(doc, url) {
015 var articleRe = /subject_search/;
016 var s = articleRe.exec(url);
017
018 if(s) {
019 return "multiple";
020 } else {
021 return "book";
022 }
023
024 return false;
025 }
026
027 function scrape(doc) {
028 var namespace = doc.documentElement.namespaceURI;
029 var nsResolver = namespace ? function(prefix) {
030 if (prefix == "x") return namespace; else return null;
031 } : null;
032 var nsResolver = null;
033
034 var itemType = "book";
035
036 var newItem = new Zotero.Item(itemType);
037 Zotero.debug(itemType);
038
039 //Zotero.debug(doc);
040 Zotero.debug("Title:");
041 // 标题,/html/body/div/h1
042 var titleTag = doc.evaluate('//html/body/div/h1', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
043
044 //Zotero.debug("stage2:");
045 var title = Zotero.Utilities.trimInternal(titleTag.textContent);
046 Zotero.debug(title);
047 newItem.title = title;
048
049 // 附件,网页链接
050 newItem.attachments.push({url:doc.location.href, snapshot:false, title:doc.title, mimeType:"text/html"});
051
052 // url信息
053 newItem.url = doc.location.href;
054
055
056 // 其他信息,//*[@id="info"]
057 var info = doc.evaluate('//*[@id="info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
058 //Zotero.debug(info);
059 // 找出作者信息(包括译者)
060 var dataRows = info.getElementsByTagName("span");
061 //Zotero.debug(dataRows.length);
062 var dataRow;
063 var i = 0;
064 while(dataRow = dataRows[i]) {
065 i = i + 1;
066 var spanTags = dataRow.getElementsByTagName("span");
067 //Zotero.debug(spanTags.length);
068 if (spanTags.length>0){
069 // 作者或译者
070 var authorType = Zotero.Utilities.trimInternal(spanTags[0].textContent);
071 var name = Zotero.Utilities.trimInternal(dataRow.getElementsByTagName("a")[0].textContent);
072 //Zotero.debug(authorType);
073 //Zotero.debug(name);
074 if(authorType == "作者"){
075 newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "author", true));
076 }else if(authorType == "译者"){
077 newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "translator", true));
078 }
079 }
080 }
081
082 // 提取ISBN,页数,定价,出版社,装帧,出版年信息,副标题
083 var obmo = info.getElementsByTagName("div")[0];
084 var content = obmo.textContent;
085 //Zotero.debug(content);
086
087 dataRows = obmo.getElementsByTagName("span");
088 Zotero.debug(dataRows.length);
089 var pagesIndex;
090 var isbnIndex;
091 var publisherIndex;
092 var dateIndex;
093 i = 0;
094 while(dataRow = dataRows[i]) {
095
096 var infoType = dataRow.textContent;
097 if(infoType == "ISBN:"){
098 isbnIndex = i;
099 }else if(infoType == "页数:"){
100 pagesIndex = i;
101 }else if(infoType == "出版社:"){
102 publisherIndex = i;
103 }else if(infoType == "出版年:"){
104 dateIndex = i;
105 }
106 i = i + 1;
107 //Zotero.debug(infoType);
108 // 去除无用的信息
109 content = content.replace(infoType,"@@@");
110 //Zotero.debug(content);
111 }
112 // 去除前后空格
113 content = content.replace(/(^\s*)|(\s*$)/g, "");
114 // 分离信息
115 var infoContents = content.split("@@@");
116 Zotero.debug("detail info:");
117 var realInfo = "";
118 for (x in infoContents){
119 Zotero.debug(infoContents[x]);
120 if (infoContents[x] != ""){
121 realInfo = realInfo + infoContents[x].replace(/(^\s*)|(\s*$)/g, "") + "@@@";
122 }
123 }
124 //Zotero.debug(realInfo);
125 realInfo = realInfo.split("@@@");
126 //Zotero.debug(realInfo.length);
127
128 // ISBN
129 newItem.ISBN = realInfo[isbnIndex];
130 // 页数
131 newItem.pages = realInfo[pagesIndex];
132 // 出版社
133 newItem.publisher = realInfo[publisherIndex];
134 // 出版年
135 newItem.date = realInfo[dateIndex];
136
137 // 简介
138 // //div[@class="related_info"]
139 var relatedInfo = doc.evaluate('//div[@class="related_info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
140 var intros = relatedInfo.getElementsByTagName("div");
141 var h2s = relatedInfo.getElementsByTagName("h2");
142
143 if(h2s.length>0){
144 // 内容简介
145 var bookIntro;
146 if(h2s[0].textContent.indexOf('简介')!=-1){
147 if(intros[0].getElementsByTagName("span").length>0){
148 // 简介太长,有隐藏部分
149 bookIntro = "简介:\n" + intros[0].getElementsByTagName("span")[1].textContent + "\n";
150 }else{
151 bookIntro = "简介:\n" + intros[0].textContent + "\n";
152 }
153 // 正确处理段落
154 bookIntro = bookIntro.replace(/\s{2,}/g, "\n");
155 }else{
156 bookIntro = "";
157 }
158
159 // 作者简介
160 var authorIntro;
161 for(i=0;i<h2s.length;i++){
162 Zotero.debug(h2s[i].textContent);
163 }
164 if(h2s[1].textContent.indexOf('作者简介')!=-1){
165 authorIntro = "作者简介:\n" + intros[1].textContent + "\n";
166 // 正确处理段落
167 authorIntro = authorIntro.replace(/\s{2,}/g, "\n");
168 }else{
169 authorIntro = "";
170 }
171 newItem.abstractNote = bookIntro + authorIntro;
172 }
173
174 newItem.complete();
175 }
176
177 function doWeb(doc, url) {
178 var namespace = doc.documentElement.namespaceURI;
179 var nsResolver = null;
180
181 if(detectWeb(doc, url) == "multiple") {
182 Zotero.debug("Enter multiple~");
183 // search page
184 var items = new Array();
185
186 // //*[@id="in_tablem"]
187 var tablemTag = doc.evaluate('//*[@id="in_tablem"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
188 var tableTags = tablemTag.getElementsByTagName("table");
189 Zotero.debug(tableTags.length);
190 var tableTag;
191 Zotero.debug("begin to fetch multiple title and link");
192 var i = 0;
193 while(tableTag = tableTags[i]) {
194 i = i + 1;
195 var tds = tableTag.getElementsByTagName("td");
196 var title ="";
197 var link = "";
198 var as = tds[1].getElementsByTagName("a");
199 link = as[0].href;
200 title = as[0].textContent;
201
202 Zotero.debug(title);
203 Zotero.debug(link);
204 if(link) {
205 items[link] = Zotero.Utilities.cleanString(title);
206 }
207 }
208 // 让用户选择要保存哪些文献
209 items = Zotero.selectItems(items);
210 if(!items) return true;
211 Zotero.debug("go on processing.");
212
213 var urls = new Array();
214 for(var url in items) {
215 urls.push(url);
216 }
217 } else {
218 var urls = [url];
219 }
220 Zotero.debug(urls);
221 // 下面对每条url进行解析
222 Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
223 Zotero.wait();
224 }