2 // @name (dm) Deviant Art Gallery Ripper
3 // @namespace DeviantRipper
4 // @description Click button and generate a list of direct image link urls for all images for a users gallery.
6 // @lastupdated 2013-08-29
7 // @include http://*.deviantart.com/favourites/*
8 // @match http://*.deviantart.com/favourites/*
9 // @include http://*.deviantart.com/gallery/*
10 // @match http://*.deviantart.com/gallery/*
11 // @!nclude http://*.deviantart.com/art/*
12 // @!atch http://*.deviantart.com/art/*
13 // @include http://browse.deviantart.com/*
14 // @match http://browse.deviantart.com/*
15 // @include http://backend.deviantart.com/rss.xml*
16 // @match http://backend.deviantart.com/rss.xml*
27 if (typeof GM_log
=== 'undefined') {
28 GM_log = function (str
) { console
.log(str
); };
37 * maxreq - maximum number of async requests to do
38 * runcon - number of running connections
39 * interval - interval holder
40 * links - array() object used to help with triggering
41 * new connections for asyncLoad
44 * startInterval - starts the interval loop
45 * accepts args: (heartbeat_function, interval)
46 * heartbeat_function = function called on each trigger pulse
47 * interval = integer of ms between interval triggers
49 * stopInterval() - stops the interval loop
52 * load - synchronous xmlHttpRequest
53 * only handles simple "GET"
54 * accepts args: (string url)
56 * asyncLoad - asynchronous xmlHttpRequest
57 * accepts args: (string url or associative array of options,
58 * function callback, optional args)
59 * url = url string to load
60 * if object can have params like GM_xmlHttpRequest ex:
61 * {url:String, method:String, headers:{associative array},
62 * data:String, onload:Function, onerror:Function,
63 * onreadystatechange:Function}
64 * onload, onerror, onreadystatechange are called with
65 * function(xmlHttpRequest event object, objparams,
66 * callback, extra_args);
68 * See below in asyncLoad definition comment for more
71 * function = callback function called as
72 * callback(xhtr, string url, optional args)
73 * do not specify a callback if using an onload above
75 * optional args = single variable passed verbatim to
76 * the callback function
79 * default heartbeat function
82 * default routine to handle next url fetch
84 * default_callback_xhtr
85 * a default callback to use when retrieving a request
88 var xHttpInstance
= this;
94 this.default_heartbeat = function () {
95 if ((xHttpInstance
.runcon
< xHttpInstance
.maxreq
) &&
96 (xHttpInstance
.links
.length
> 0)) {
97 // do something here when you have an opening to get more stuff
98 xHttpInstance
.default_next_url();
100 if ((xHttpInstance
.links
.length
=== 0) &&
101 (xHttpInstance
.runcon
=== 0)) {
102 // do something here when out of things to
103 xHttpInstance
.stopInterval();
106 this.default_next_url = function () {
107 if (xHttpInstance
.links
.length
> 0) {
108 var link_data
= xHttpInstance
.links
.shift().toString();
109 xHttpInstance
.asyncLoad(link_data
,
110 xHttpInstance
.default_callback_xhtr
);
113 this.default_callback_xhtr = function (xhtr
, strURL
, args
) {
114 // do something with the result
115 // xhtr is the xmlHttpRequest object
116 // common values are:
122 if (xhtr
.status
=== 404) {
123 // do something when 404 not found
125 alert("Page wasn't found at: " + strURL
+ "\n" +
126 xhtr
.status
+ " " + xhtr
.statusText
);
127 } else if (xhtr
.status
=== 200) {
128 // do something when 200 ok
130 alert(xhtr
.responseText
);
132 // do other stuff with other codes
133 alert("Site returned: " + xhtr
.status
+ " '" +
134 xhtr
.statusText
+ "' for: \n" + strURL
);
138 * startInterval (heartbeat_function, heartbeat_pulse)
140 * heartbeat_function: function reference to call on each heartbeat
142 * heartbeat_pulse: integer
145 this.startInterval = function (heartbeat_function
, heartbeat_pulse
) {
148 // check for and stop existing interval
149 if (xHttpInstance
.interval
!== null) { xHttpInstance
.stopInterval(); }
151 if (typeof heartbeat_pulse
=== 'undefined') {
154 pulse_rate
= heartbeat_pulse
;
155 if (isNaN(pulse_rate
)) { // validate its an actual number
156 throw "startInterval given invalid pulse rate :" +
160 if (typeof heartbeat_function
=== 'undefined') {
161 heartbeat_func
= xHttpInstance
.default_heartbeat
;
163 heartbeat_func
= heartbeat_function
;
166 if (!heartbeat_func
instanceof Function
) {
167 throw "startInterval given incorrect heartbeat function argument.";
169 /* end error checking */
170 xHttpInstance
.interval
= setInterval(heartbeat_func
, pulse_rate
);
176 * stops the xHttp interval loop.
178 this.stopInterval = function () {
179 clearInterval(xHttpInstance
.interval
);
180 xHttpInstance
.interval
= null;
186 * synchronus XMLHttpRequest load with simple parameter request.
187 * Returns text value of get request or false.
189 this.load = function (strURL
) {
190 //if (debug) { GM_log("Getting url: " + strURL); }
191 var xhtr
= new XMLHttpRequest();
192 xhtr
.open("GET", strURL
, false);
194 if (xhtr
.readyState
=== 4 && xhtr
.status
=== 200) {
195 return xhtr
.responseText
;
201 * asyncLoad(objparams, callback, extra_args)
203 * multithreaded url fetching routine
204 * gets url contents and sends to callback function
206 * if objparams is passed as a string function assumes
207 * simple get request with objparams being a url string.
210 * objparams object properties imitates grease monkey
211 * GM_xmlHttpRequest function.
213 * method - a string, the HTTP method to use on this request.
214 * Generally GET, but can be any HTTP verb, including POST,
217 * url - a string, the URL to use on this request. Required.
219 * headers - an associative array of HTTP headers to include on
220 * this request. Optional, defaults to an empty array. Example:
221 * headers: {'User-Agent': 'Mozilla/4.0 (compatible) Greasemonkey',
222 * 'Accept': 'application/atom+xml,application/xml,text/xml'}
224 * data - a string, the body of the HTTP request. Optional, defaults
225 * to an empty string. If you are simulating posting a form
226 * (method == 'POST'), you must include a Content-type of
227 * 'application/x-www-form-urlencoded' in the headers field,
228 * and include the URL-encoded form data in the data field.
230 * onreadystatechange - a function object, the callback function to be
231 * called repeatedly while the request is in progress.
233 * onerror - a function object, the callback function to be called
234 * if an error occurs while processing the request.
236 * onload - a function object, the callback function to be called when
237 * the request has finished successfully.
238 * ** DO NOT ** specify a callback function if using onload.
239 * onload will take precedence and fire instead of callback.
240 * onload will pass the callback value to its called function
241 * if you want to use the values in some way. See definition
242 * below for the default_onload.
245 this.asyncLoad = function (objparams
, callback
, extra_args
) {
246 //if (debug) GM_log("Async Getting url : " + url);
248 // local function Variables
251 var default_method
= "GET";
253 var http_req
= new XMLHttpRequest();
254 var xHttpPtr
= xHttpInstance
;
256 var useGMxml
= false;
258 var onerror_wrapper
= null;
259 var onload_wrapper
= null;
260 var onreadystatechange_wrapper
= null;
261 // end local function variables
263 var default_onerror = function (args
) {
265 * do something here when there's errors.
269 target
= args
.target
;
273 xHttpInstance
.runcon
-= 1;
274 if (onerror_wrapper
!== null) {
275 onerror_wrapper(target
, objparams
, callback
, extra_args
);
279 var default_onreadystatechange = function (args
) {
282 target
= args
.target
;
286 if (onreadystatechange_wrapper
!== null) {
287 onreadystatechange_wrapper(target
, objparams
,
288 callback
, extra_args
);
292 var default_onload = function (args
) {
294 // GM_log("xmlHttpRequest response: " +
295 // args.readyState + " " + args.status + " " +
300 target
= args
.target
;
304 xHttpPtr
.runcon
-= 1;
305 if (onload_wrapper
!== null) {
306 onload_wrapper(target
, objparams
, callback
, extra_args
);
308 callback(target
, url
, extra_args
);
312 if (typeof objparams
!== 'object') {
313 if (typeof objparams
=== 'string') {
315 method
= default_method
;
316 http_req
.open(method
, url
, true);
318 throw "asyncLoad error: parameters not object or string";
322 // check url parameter value
323 if (typeof objparams
['url'] !== 'string') {
324 throw "asyncLoad error: missing url parameter.";
326 // make sure its not blank
327 url
= objparams
['url'];
329 throw "asyncLoad error: url parameter is empty string.";
333 // check if we specified method
334 if (typeof objparams
['method'] === 'string') {
335 method
= objparams
['method'];
337 method
= default_method
;
340 // open xmlHttpRequest so we can properly set headers
341 http_req
.open(method
, url
, true);
343 // check if we specified any custom headers and have some sort
344 // of validation of the data. Just ignores non strings.
345 if (typeof objparams
['headers'] === 'object') {
346 for (headkey
in objparams
['headers']) {
347 if (objparams
['headers'].hasOwnProperty(headkey
)) {
348 if (typeof headkey
=== 'string') {
349 if (typeof objparams
['headers'][headkey
]
351 http_req
.setRequestHeader(headkey
,
352 objparams
['headers'][headkey
]);
359 if (typeof objparams
['data'] === 'string') {
360 send_data
= objparams
['data'];
363 if (typeof objparams
['onreadystatechange'] === 'function') {
364 onreadystatechange_wrapper
= objparams
['onreadystatechange'];
367 if (typeof objparams
['onerror'] === 'function') {
368 onerror_wrapper
= objparams
['onerror'];
371 if (typeof objparams
['onload'] === 'function') {
372 onload_wrapper
= objparams
['onload'];
375 if (objparams
['useGMxml']) {
381 if (typeof callback
!== 'function' &&
382 typeof onload_wrapper
!== 'function') {
383 throw "asyncLoad error: no callback or onload function passed.";
386 xHttpPtr
.runcon
+= 1;
392 headers: objparams
['headers'],
393 onload: default_onload
,
394 onerror: default_onerror
,
395 onreadystatechange: default_onreadystatechange
399 http_req
.onerror
= default_onerror
;
400 http_req
.onreadystatechange
= default_onreadystatechange
;
401 http_req
.onload
= default_onload
;
403 http_req
.send(send_data
);
408 var deviantRipper
= {
409 isChrome : /chrome/i.test(navigator
.userAgent
),
410 isFireFox : /firefox/i.test(navigator
.userAgent
),
412 useGMxml : false, // flag to use GM_xmlHttpRequest instead of XMLHttpRequst
413 xml_link_data : [], // array holder for xlm page links
415 //recurse var used for thumbnail pages mainly. if set to 0 and button
416 //clicked on single page it doesn't really do anything useful.
417 recurse: true, // recuse into lower gallery pages
418 current: 0, // current counter reused for image and gallery parsing
419 total: 0, // total counter used for image parsing
420 urls: [], // holder for url html list
421 toparse: [], // list of urls of single image pages that need to be parsed for DDL
422 textbox: null, // textbox holder
423 fetchStatus: 0 // status id for script checking status:
424 // 0 = not started, 1 = getting indexes
425 // 2 = getting image DDL, 3 = finished everything
426 // 4 = displayed urls (finished or aborted)
432 * function called when we're all done and we want to
433 * display the list of url's we got.
435 display_url_list : function () {
436 var docNamespace
= 'http://www.w3.org/1999/xhtml';
439 if (debug
) { GM_log("Call: display_url_list()"); }
440 if (debug
) { GM_log(deviantRipper
); }
441 if (deviantRipper
.pages
.fetchStatus
> 3) { return; }
442 deviantRipper
.pages
.textbox
=
443 document
.createElementNS(docNamespace
, "textarea");
444 deviantRipper
.pages
.textbox
.style
.width
= '100%';
445 for (counter
= 0; counter
< deviantRipper
.pages
.urls
.length
; counter
+= 1) {
446 if (debug
) { GM_log("Fixing " + deviantRipper
.pages
.urls
[counter
]); }
447 if (deviantRipper
.pages
.urls
[counter
].indexOf('http://th') > -1) {
448 tmpStr
= deviantRipper
.pages
.urls
[counter
].replace('http://th', 'http://fc').replace('/PRE/', '/');
449 deviantRipper
.pages
.urls
[counter
] = tmpStr
;
452 deviantRipper
.pages
.textbox
.innerHTML
=
453 deviantRipper
.pages
.urls
.join('\r\n');
454 document
.body
.insertBefore(deviantRipper
.pages
.textbox
,
455 document
.body
.firstChild
);
456 deviantRipper
.pages
.fetchStatus
= 4;
462 * Called as first function execution upon script load.
463 * Sets up the xmlHttpRequest helpers and generates click button.
466 // Check whether we're on backend
467 deviantRipper
.xml_xHttp
= new XHttp();
469 if (debug
) { GM_log("init() isChrome: " + deviantRipper
.isChrome
+ " isFireFox: " + deviantRipper
.isFireFox
); }
470 if (deviantRipper
.isFireFox
=== true) {
471 deviantRipper
.useGMxml
= true;
474 if (/backend/i.test(location
.hostname
) === true) {
475 if (/rss\.xml/i.test(location
.href
) === true) {
476 // test if we're in iframe if not then get out
477 if (window
=== parent
) { return; }
479 deviantRipper
.pages
.btnID
= deviantRipper
.btn
.generateXMLButton();
480 deviantRipper
.btn
.startXML(document
.location
.href
);
483 deviantRipper
.pages
.btnID
= deviantRipper
.btn
.generateButton();
489 * isThumbnailGallery (doc)
491 * return true if page seems to be a gallery index
492 * or false if it looks like its a single image page
493 * detection is looking for the comments by the artist
494 * usually found on the single image page
496 isThumbnailGallery : function (doc
) {
497 if (debug
) { GM_log("Call: isThumbnailGallery()"); }
498 return (doc
.getElementById("artist-comments")) ? false : true;
504 * check if we clicked the button to abort script
505 * if we did it requires a page reload to start again
508 isAborted : function () {
509 if (debug
) { GM_log("isAborted(): " + deviantRipper
.abort_links
); }
510 if (deviantRipper
.abort_links
=== true) {
511 deviantRipper
.pages
.btnID
.value
= 'Aborted: ' + deviantRipper
.pages
.btnID
.value
;
512 if (debug
) { GM_log("FetchStatus: " + deviantRipper
.pages
.fetchStatus
); }
513 if (deviantRipper
.pages
.fetchStatus
> 1) { deviantRipper
.display_url_list(); }
514 deviantRipper
.xml_link_data
= [];
515 deviantRipper
.pages
.toparse
= [];
525 * get our next gallery page from our stack,
526 * increment our fetching counter, and fetch page
528 next_xml : function () {
531 if (debug
) { GM_log("Call: next_xml()"); }
532 if (deviantRipper
.checker
.isAborted()) {
535 if (deviantRipper
.xml_link_data
.length
> 0) {
536 link_uri
= deviantRipper
.xml_link_data
.shift().toString();
537 if (debug
) { GM_log("Shifted: " + link_uri
+ "\ntypeof: " + typeof link_uri
); }
539 if (deviantRipper
.useGMxml
) {
541 GM_log("Using GreaseMonkey GM_xmlHttpRequest.");
543 deviantRipper
.xml_xHttp
.asyncLoad({
546 onload: deviantRipper
.callback
.scan_xml_dom
549 deviantRipper
.xml_xHttp
.asyncLoad(link_uri
, deviantRipper
.callback
.scan_xml_dom
);
558 * image_links_xml (docbase)
560 * function called after we load a gallery index page,
561 * "docbase" references the document of the index page
562 * so we can start looking for thumbnails in order to
563 * get the single image page links.
565 image_links_xml : function (docbase
) {
566 if (debug
) { GM_log("Call: image_links_xml()"); }
576 items
= docbase
.getElementsByTagNameNS('*', 'item');
577 if (items
.length
< 1) {
578 deviantRipper
.pages
.recurse
= false;
582 for (counter
= 0; counter
< items
.length
; counter
+= 1) {
583 content
= items
[counter
].getElementsByTagNameNS('*', 'content');
584 thumbnails
= items
[counter
].getElementsByTagNameNS('*', 'thumbnail');
587 if (thumbnails
.length
> 0) {
588 // grab last thumbnail item and use it incase we don't find any content lines
589 thumbnail
= thumbnails
[thumbnails
.length
- 1].getAttribute('url');
592 for (locounter
= 0; locounter
< content
.length
; locounter
+= 1) {
593 if (content
[locounter
].getAttribute('medium') === 'image') { lofi
= content
[locounter
].getAttribute('url'); }
594 if (content
[locounter
].getAttribute('medium') === 'document') { hifi
= content
[locounter
].getAttribute('url'); }
597 if (debug
) { GM_log("Hifi: " + hifi
); }
598 deviantRipper
.pages
.urls
.push(hifi
);
599 } else if (lofi
!== null) {
600 if (debug
) { GM_log("Lofi: " + lofi
); }
601 deviantRipper
.pages
.urls
.push(lofi
);
603 if (debug
) { GM_log("thumbnail: " + thumbnail
); }
604 deviantRipper
.pages
.urls
.push(thumbnail
);
607 if (debug
) { GM_log([counter
, length
, deviantRipper
.pages
.urls
.length
]); }
612 * next_xml_page_link (docbase)
614 * Function called after loading xml page looking for next
616 next_xml_page_link : function (docbase
) {
617 if (debug
) { GM_log("Call: next_xml_page_link()"); }
618 if (debug
) { GM_log(docbase
); }
621 // var counter, length;
622 // links = docbase.getElementsByTagNameNS('http://www.w3.org/2005/Atom', 'link');
623 // for (counter = 0, length = links.length;
626 // if (links[counter].getAttribute('rel').toString() === "next") {
627 // rtn_val = links[counter];
631 rtn_val
= docbase
.querySelector('link[rel="next"]');
633 rtn_val
= rtn_val
.getAttribute('href');
634 if (debug
) { GM_log("NextXML page: " + rtn_val
); }
647 * scan_xml_dom (HTML_Data, url, args)
649 * called when gallery page html is loaded
650 * so we can parse images out and set next page
652 scan_xml_dom : function (HTML_Data
, url
, args
) {
653 if (debug
) { GM_log("Call: scan_xml_dom()"); }
658 html_dom
= HTML_Data
.responseXML
;
660 if (HTML_Data
.responseText
!== "") {
661 parser
= new DOMParser();
662 html_dom
= parser
.parseFromString(HTML_Data
.responseText
, "text/xml");
664 throw "There was an error parsing XML from: " + url
;
668 // parse and add images on page to fetch stack
669 deviantRipper
.parser
.image_links_xml(html_dom
);
671 deviantRipper
.pages
.current
+= 1;
672 deviantRipper
.pages
.btnID
.value
= "Loading xml page " +
673 deviantRipper
.pages
.current
+
674 "(" + deviantRipper
.pages
.urls
.length
+ ")";
676 if (deviantRipper
.pages
.recurse
) {
677 nextPage
= deviantRipper
.parser
.next_xml_page_link(html_dom
);
678 if (nextPage
) { deviantRipper
.xml_link_data
.push(nextPage
.toString()); }
689 * onclick function triggered when the
690 * button we injected is clicked to get
693 getLinks : function () {
694 if (debug
) { GM_log("Call: getLinks()"); }
697 var docNamespace
= 'http://www.w3.org/1999/xhtml';
699 deviantRipper
.pages
.btnID
.removeEventListener("click", deviantRipper
.btn
.getLinks
, false);
700 feedbutton
= document
.querySelector('link[type="application/rss+xml"]');
702 throw "No feed button on this page.";
705 if (deviantRipper
.isChrome
=== true) {
706 deviantRipper
.pages
.btnID
.parentNode
.removeChild(
707 deviantRipper
.pages
.btnID
710 iframeLoader
= document
.createElementNS(
714 iframeLoader
.src
= feedbutton
.href
;
715 iframeLoader
.style
.width
= '100%';
716 iframeLoader
.style
.height
= '100px';
717 document
.body
.insertBefore(
719 document
.body
.firstChild
722 deviantRipper
.btn
.startXML(feedbutton
.href
);
730 * started from init() to start grabbing XML pages
731 * starting with current loaded one. Script assumes
732 * we loaded from an iframe.
734 startXML : function (galleryLink
) {
735 if (debug
) { GM_log("Call: startXML(" + arguments
[0] + ")"); }
736 deviantRipper
.pages
.btnID
.addEventListener('click', deviantRipper
.btn
.abortLinkChecking
, false);
737 deviantRipper
.xml_link_data
.push(galleryLink
.toString());
738 deviantRipper
.pages
.fetchStatus
= 1;
739 deviantRipper
.xml_xHttp
.startInterval(deviantRipper
.heartbeat
.load_xml
, 50);
743 * abortLinkChecking ()
745 * onclick triggered when button is clicked
746 * while we're getting links.
748 abortLinkChecking : function () {
749 deviantRipper
.abort_links
= true;
750 GM_log("abortLinkChecking()");
751 deviantRipper
.pages
.btnID
.removeEventListener('click', deviantRipper
.abortLinkChecking
, false);
757 * creates the click button for our page
759 generateButton : function () {
760 if (debug
) { GM_log("Call: generateButton()"); }
764 new_button
= document
.createElement("input");
765 new_button
.type
= "button";
766 new_button
.value
= "Get URLs for Gallery";
767 new_button
.setAttribute("onsubmit", "return false;");
769 // var btnLoc = document.getElementById("gmi-GalleryEditor");
770 btnLoc
= document
.getElementById("output");
772 btnLoc
.insertBefore(new_button
, btnLoc
.firstChild
);
773 new_button
.addEventListener("click", deviantRipper
.btn
.getLinks
, false);
775 new_button
.value
= "Root Thumbnail Page?";
776 document
.body
.insertBefore(new_button
, document
.body
.firstChild
);
782 * generateXMLButton()
784 * creates the click button for our page
786 generateXMLButton : function () {
787 if (debug
) { GM_log("Call: generateXMLButton()"); }
789 var docNamespace
= 'http://www.w3.org/1999/xhtml';
790 var replacedRootNode
= document
.createElement('clearinghouse');
792 // empty out the current document view.
793 if (deviantRipper
.isChrome
=== true) {
794 while (document
.documentElement
.firstChild
) {
795 replacedRootNode
.appendChild(
796 document
.documentElement
.firstChild
799 } else if (deviantRipper
.isFireFox
=== true) {
800 while (document
.body
.firstChild
) {
801 replacedRootNode
.appendChild(
802 document
.body
.firstChild
807 if (document
.body
=== null) {
808 document
.body
= document
.createElementNS('http://www.w3.org/1999/xhtml', 'body');
809 document
.documentElement
.appendChild(document
.body
);
812 new_button
= document
.createElementNS(docNamespace
, 'input');
813 new_button
.type
= "button";
814 new_button
.value
= "Loading...";
815 new_button
.setAttribute("onsubmit", "return false;");
816 document
.body
.appendChild(new_button
);
817 new_button
.addEventListener('click', deviantRipper
.btn
.abortLinkChecking
, false);
828 * heartbeat loop while loading gallerie indices
830 load_xml : function () {
831 var runcon
= deviantRipper
.xml_xHttp
.runcon
;
832 var maxreq
= deviantRipper
.xml_xHttp
.maxreq
;
833 var length
= deviantRipper
.xml_link_data
.length
;
834 if ((runcon
< maxreq
) && (length
> 0)) {
835 if (debug
) { GM_log("heartbeat load_xml()\nrunning connections: (" + runcon
+ ') max running (' + maxreq
+ ')'); }
836 deviantRipper
.checker
.next_xml();
838 if ((length
=== 0) && (runcon
=== 0)) {
839 if (debug
) { GM_log("Stopping heartbeat out of xml pages to pull."); }
840 deviantRipper
.xml_xHttp
.stopInterval();
842 deviantRipper
.pages
.total
= deviantRipper
.pages
.toparse
.length
;
843 deviantRipper
.pages
.fetchStatus
= 3;
844 deviantRipper
.xml_xHttp
.startInterval(deviantRipper
.heartbeat
.xml_finisher
, 50);
851 * watches for xml to finish loading then displays the urls.
853 xml_finisher : function () {
854 var runcon
= deviantRipper
.xml_xHttp
.runcon
;
855 var length
= deviantRipper
.xml_link_data
.length
;
856 if ((length
=== 0) && (runcon
=== 0)) {
857 if (debug
) { GM_log("Stopping heartbeat xml_finisher."); }
858 deviantRipper
.xml_xHttp
.stopInterval();
860 deviantRipper
.display_url_list();
869 if (debug
) { GM_log("Current URL loaded from: " + document
.location
.href
); }
870 //start the dirty stuff
871 deviantRipper
.init();