比这篇新的文章: 支持CNKI的Zotero的translator
比这篇旧的文章: 做邮件标题解码的perl代码

支持豆瓣的zotero的translator

语言: JavaScript, 标签: 豆瓣 translator zotero 2009/02/19发布 11个月前更新 更新记录
作者: Ace Strong, 点击1235次, 评论(0), 收藏者(0), , 打分:

背景
主题: 字体:
001 {
002     "translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
003     "translatorType":4,
004     "label":"Douban",
005     "creator":"Ace Strong<acestrong@gmail.com>",
006     "target":"^https?://www.douban.com/subject",
007     "minVersion":"1.0.0b6",
008     "maxVersion":"",
009     "priority":100,
010     "inRepository":true,
011     "lastUpdated":"2009-3-2 15:20:00"
012 }
013
014 function detectWeb(doc, url) {
015     var articleRe = /subject_search/;
016     var s = articleRe.exec(url);
017
018     if(s) {
019         return "multiple";
020     } else {
021         return "book";
022     }
023
024     return false;
025 }
026
027 function scrape(doc) {
028     var namespace = doc.documentElement.namespaceURI;
029     var nsResolver = namespace ? function(prefix) {
030         if (prefix == "x") return namespace; else return null;
031     } : null;
032     var nsResolver = null;
033
034     var itemType = "book";
035
036     var newItem = new Zotero.Item(itemType);
037     Zotero.debug(itemType);
038
039     //Zotero.debug(doc);
040     Zotero.debug("Title:");
041     // 标题,/html/body/div/h1
042     var titleTag = doc.evaluate('//html/body/div/h1', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
043
044     //Zotero.debug("stage2:");
045     var title = Zotero.Utilities.trimInternal(titleTag.textContent);
046     Zotero.debug(title);
047     newItem.title = title;
048
049     // 附件,网页链接
050     newItem.attachments.push({url:doc.location.href, snapshot:false, title:doc.title, mimeType:"text/html"});
051
052     // url信息
053     newItem.url = doc.location.href;
054
055
056     // 其他信息,//*[@id="info"]
057     var info = doc.evaluate('//*[@id="info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
058     //Zotero.debug(info);
059     // 找出作者信息(包括译者)
060     var dataRows = info.getElementsByTagName("span");
061     //Zotero.debug(dataRows.length);
062     var dataRow;
063     var i = 0;
064     while(dataRow = dataRows[i]) {
065         i = i + 1;
066         var spanTags = dataRow.getElementsByTagName("span");
067         //Zotero.debug(spanTags.length);
068         if (spanTags.length>0){
069             // 作者或译者
070             var authorType = Zotero.Utilities.trimInternal(spanTags[0].textContent);
071             var name = Zotero.Utilities.trimInternal(dataRow.getElementsByTagName("a")[0].textContent);
072             //Zotero.debug(authorType);
073             //Zotero.debug(name);
074             if(authorType == "作者"){
075                 newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "author", true));
076             }else if(authorType == "译者"){
077                 newItem.creators.push(Zotero.Utilities.cleanAuthor(name, "translator", true));
078             }
079         }
080     }
081
082     // 提取ISBN,页数,定价,出版社,装帧,出版年信息,副标题
083     var obmo =  info.getElementsByTagName("div")[0];
084     var content = obmo.textContent;
085     //Zotero.debug(content);
086
087     dataRows = obmo.getElementsByTagName("span");
088     Zotero.debug(dataRows.length);
089     var pagesIndex;
090     var isbnIndex;
091     var publisherIndex;
092     var dateIndex;
093     i = 0;
094     while(dataRow = dataRows[i]) {
095
096         var infoType = dataRow.textContent;
097         if(infoType == "ISBN:"){
098             isbnIndex = i;
099         }else if(infoType == "页数:"){
100             pagesIndex = i;
101         }else if(infoType == "出版社:"){
102             publisherIndex = i;
103         }else if(infoType == "出版年:"){
104             dateIndex = i;
105         }
106         i = i + 1;
107         //Zotero.debug(infoType);
108         // 去除无用的信息
109         content = content.replace(infoType,"@@@");
110         //Zotero.debug(content);
111     }
112     // 去除前后空格
113     content = content.replace(/(^\s*)|(\s*$)/g, "");
114     // 分离信息
115     var infoContents = content.split("@@@");
116     Zotero.debug("detail info:");
117     var realInfo = "";
118     for (x in infoContents){
119         Zotero.debug(infoContents[x]);
120         if (infoContents[x] != ""){
121             realInfo = realInfo + infoContents[x].replace(/(^\s*)|(\s*$)/g, "") + "@@@";
122         }
123     }
124     //Zotero.debug(realInfo);
125     realInfo = realInfo.split("@@@");
126     //Zotero.debug(realInfo.length);
127
128     // ISBN
129     newItem.ISBN = realInfo[isbnIndex];
130     // 页数
131     newItem.pages = realInfo[pagesIndex];
132     // 出版社
133     newItem.publisher = realInfo[publisherIndex];
134     // 出版年
135     newItem.date = realInfo[dateIndex];
136
137     // 简介
138     // //div[@class="related_info"]
139     var relatedInfo = doc.evaluate('//div[@class="related_info"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
140     var intros = relatedInfo.getElementsByTagName("div");
141     var h2s = relatedInfo.getElementsByTagName("h2");
142
143     if(h2s.length>0){
144         // 内容简介
145         var bookIntro;
146         if(h2s[0].textContent.indexOf('简介')!=-1){
147             if(intros[0].getElementsByTagName("span").length>0){
148                 // 简介太长,有隐藏部分
149                 bookIntro = "简介:\n" + intros[0].getElementsByTagName("span")[1].textContent + "\n";
150             }else{
151                 bookIntro = "简介:\n" + intros[0].textContent + "\n";
152             }
153             // 正确处理段落
154             bookIntro = bookIntro.replace(/\s{2,}/g, "\n");
155         }else{
156             bookIntro = "";
157         }
158
159         // 作者简介
160         var authorIntro;
161         for(i=0;i<h2s.length;i++){
162             Zotero.debug(h2s[i].textContent);
163         }
164         if(h2s[1].textContent.indexOf('作者简介')!=-1){
165             authorIntro = "作者简介:\n" + intros[1].textContent + "\n";
166             // 正确处理段落
167             authorIntro = authorIntro.replace(/\s{2,}/g, "\n");
168         }else{
169             authorIntro = "";
170         }
171         newItem.abstractNote = bookIntro + authorIntro;
172     }
173
174     newItem.complete();
175 }
176
177 function doWeb(doc, url) {
178     var namespace = doc.documentElement.namespaceURI;
179     var nsResolver = null;
180
181     if(detectWeb(doc, url) == "multiple") {
182         Zotero.debug("Enter multiple~");
183         // search page
184         var items = new Array();
185
186         // //*[@id="in_tablem"]
187         var tablemTag = doc.evaluate('//*[@id="in_tablem"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
188         var tableTags = tablemTag.getElementsByTagName("table");
189         Zotero.debug(tableTags.length);
190         var tableTag;
191         Zotero.debug("begin to fetch multiple title and link");
192         var i = 0;
193         while(tableTag = tableTags[i]) {
194             i = i + 1;
195             var tds = tableTag.getElementsByTagName("td");
196             var title ="";
197             var link = "";
198             var as =  tds[1].getElementsByTagName("a");
199             link = as[0].href;
200             title = as[0].textContent;
201
202             Zotero.debug(title);
203             Zotero.debug(link);
204             if(link) {
205                 items[link] = Zotero.Utilities.cleanString(title);
206             }
207         }
208         // 让用户选择要保存哪些文献
209         items = Zotero.selectItems(items);
210         if(!items) return true;
211         Zotero.debug("go on processing.");
212
213         var urls = new Array();
214         for(var url in items) {
215             urls.push(url);
216         }
217     } else {
218         var urls = [url];
219     }
220     Zotero.debug(urls);
221     // 下面对每条url进行解析
222     Zotero.Utilities.processDocuments(urls, scrape, function() { Zotero.done(); });
223     Zotero.wait();
224 }


所有评论,共0条:( 我也来说两句)


发表评论

注册登录后再发表评论