pavuk man page on DragonFly

Man page or keyword search:  
man Server   44335 pages
apropos Keyword Search (all sections)
Output format
DragonFly logo
[printable version]

pavuk(1)			Internet utils			      pavuk(1)

NAME
       pavuk  -	 HTTP,	HTTP  over SSL, FTP, FTP over SSL and Gopher recursive
       document retrieval program

SYNOPSIS
       pavuk [-mode {normal | resumeregets | singlepage | singlereget | sync |
       dontstore | ftpdir | mirror}] [-X] [-runX] [-bg/-nobg] [prefs/-noprefs]
       [-h]  [-v]  [-progress/-noprogress]  [-stime/-nostime]  [-xmaxlog  $nr]
       [-logfile  $file]  [-slogfile  $file] [-auth_file $file] [-msgcat $dir]
       [-language      $str]	  [-gui_font	  $font]      [-quiet/-verbose
       [-read_css/-noread_css]	[-cdir	$dir]  [-scndir $dir] [-scenario $str]
       [-dumpscn $filename] [-lmax $nr] [-dmax $nr] [-leave_level $nr]	[-max‐
       size $nr] [-minsize $nr] [-asite $list] [-dsite $list] [-adomain $list]
       [-ddomain $list] [-asfx $list] [-dsfx $list] [-aprefix $list] [-dprefix
       $list] [-amimt $list] [-dmimet $list] [-pattern $pattern] [-url_pattern
       $pattern] [-rpattern $regexp]  [-url_rpattern  $regexp]	[-skip_pattern
       $pattern]   [-skip_url_pattern	$pattern]   [-skip_rpattern   $regexp]
       [-skip_url_rpattern $regexp] [-newer_than  $time]  [-older_than	$time]
       [-schedule   $time]  [-reschedule  $nr]	[-dont_leave_site/-leave_site]
       [-dont_leave_dir/-leave_dir]  [-http_proxy  $site[:$port]]  [-ftp_proxy
       $site[:$port]] [-ssl_proxy $site[:$port]] [-gopher_proxy $site[:$port]]
       [-ftp_httpgw/-noftp_httpgw]	   [-ftp_dirtyproxy/-noftp_dirtyproxy]
       [-gopher_httpgw/-nogopher_httpgw]     [-noFTP/-FTP]     [-noHTTP/-HTTP]
       [-noSSL/-SSL]  [-noGopher/-Gopher]  [-FTPdir/-noFTPdir]	 [-noCGI/-CGI]
       [-FTPlist/-noFTPlist]   [-FTPhtml/-noFTPhtml]   [-noRelocate/-Relocate]
       [-force_reget/-noforce_reget]			     [-nocache/-cache]
       [-check_size/-nocheck_size]	[-noRobots/-Robots]	 [-noEnc/-Enc]
       [-auth_name     $user]	  [-auth_passwd	     $pass]	 [-auth_scheme
       1/2/3/4/user/Basic/Digest/NTLM]	       [-auth_reuse_nonce/-no_auth_re‐
       use_nonce]   [-http_proxy_user	 $user]	   [-http_proxy_pass	$pass]
       [-http_proxy_auth       1/2/3/4/user/Basic/Digest/NTLM]	    [-auth_re‐
       use_proxy_nonce/-no_auth_reuse_proxy_nonce]    [-ssl_key_file	$file]
       [-ssl_cert_file	 $file]	  [-ssl_cert_passwd   $pass]   [-from  $email]
       [-send_from/-nosend_from] [-identity $str]  [-auto_referer/-noauto_ref‐
       erer]  [-referer/-noreferer]  [-alang  $list] [-acharset $list] [-retry
       $nr] [-nregets $nr] [-nredirs $nr] [-rollback $nr] [-sleep $nr] [-time‐
       out   $nr]   [-preserve_time/-nopreserve_time]  [-preserve_perm/-nopre‐
       serve_perm]   [-preserve_slinks/-nopreserve_slinks]   [-bufsize	  $nr]
       [-maxrate  $nr]	[-minrate  $nr]	 [-user_condition  $str] [-cookie_file
       $file]	[-cookie_send/-nocookie_send]	 [-cookie_recv/-nocookie_recv]
       [-cookie_update/-nocookie_update]     [-cookies_max     $nr]	[-dis‐
       abled_cookie_domains  $list]  [-disable_html_tag	 $TAG,[$ATTRIB][;...]]
       [-enable_html_tag     $TAG,[$ATTRIB][;...]]	[-tr_del_chr	 $str]
       [-tr_str_str $str1 $str2] [-tr_chr_chr $chrset1 $chrset2]  [-index_name
       $str]  [-store_index/-nostore_index]  [-store_name $str] [-debug/-node‐
       bug]  [-debug_level  $level]   [-browser	  $str]	  [-urls_file	$file]
       [-file_quota  $nr] [-trans_quota $nr] [-fs_quota $nr] [-enable_js/-dis‐
       able_js]	   [-fnrules	$t    $m    $r]	   [-store_info/-nostore_info]
       [-all_to_local/-noall_to_local]	       [-sel_to_local/-nosel_to_local]
       [-all_to_remote/-noall_to_remote]      [-url_strategie	   $strategie]
       [-remove_adv/-noremove_adv]   [-adv_re	$RE]   [-check_bg/-nocheck_bg]
       [-send_if_range/-nosend_if_range]	   [-sched_cmd		 $str]
       [-unique_log/-nounique_log]    [-post_cmd   $str]   [-ssl_version   $v]
       [-unique_sslid/-nounique_sslid] [-aip_pattern $re]  [-dip_pattern  $re]
       [-use_http11/-nouse_http11]  [-local_ip	$addr] [-request $req] [-form‐
       data $req] [-httpad $str] [-nthreads $nr] [-immesg/-noimmesg]  [-dumpfd
       $nr]	   [-dump_urlfd	      $nr]	 [-unique_name/-nounique_name]
       [-leave_site_enter_dir/-dont_leave_site_enter_dir]   [-max_time	  $nr]
       [-del_after/-nodel_after]		   [-singlepage/-nosinglepage]
       [-dump_after/-nodump_after]	     [-dump_response/-nodump_response]
       [-auth_ntlm_domain  $str]  [-auth_proxy_ntlm_domain  $str] [-js_pattern
       $re]   [-follow_cmd    $str]    [-retrieve_symlink/-noretrieve_symlink]
       [-js_transform	$p   $t	  $h   $a]   [-js_transform2   $p  $t  $h  $a]
       [-ftp_proxy_user		$str]	       [-ftp_proxy_pass		 $str]
       [-limit_inlines/-dont_limit_inlines]	 [-ftp_list_options	 $str]
       [-fix_wuftpd_list/-nofix_wuftpd_list]	 [-post_update/-nopost_update]
       [-info_dir  $dir]  [-mozcache_dir  $dir]	 [-aport $list] [-dport $list]
       [-hack_add_index/-nohack_add_index]	 [-default_prefix	 $str]
       [-rsleep/-norsleep]     [-ftp_login_handshake	 $host	   $handshake]
       [-js_script_file	     $file]	  [-dont_touch_url_pattern	 $pat]
       [-dont_touch_url_rpattern    $pat]    [-dont_touch_tag_rpattern	 $pat]
       [-tag_pattern $tag $attrib  $url]  [-tag_rpattern  $tag	$attrib	 $url]
       [-nss_cert_dir							 $dir]
       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]	  [-nss_domes‐
       tic_policy/-nss_export_policy] [-[no]verify] [-tlogfile $file] [-trela‐
       tive {object | program}] [-transparent_proxy  FQDN[:port]]  [-transpar‐
       ent_ssl_proxy FQDN[:port]] [-sdemo] [-noencode] [URLs]

       pavuk -mode {normal | singlepage | singlereget} [-base_level $nr]

       pavuk  -mode  sync  [-ddays  $nr]  [-subdir  $dir]  [-remove_old/-nore‐
       move_old]

       pavuk -mode resumeregets [-subdir $dir]

       pavuk -mode linkupdate [-X]  [-h]  [-v]	[-cdir	$dir]  [-subdir	 $dir]
       [-scndir $dir] [-scenario $str]

       pavuk -mode reminder [-remind_cmd $str]

       pavuk   -mode   mirror	[-subdir   $dir]   [-remove_old/-noremove_old]
       [-remove_before_store/-noremove_before_store]	  [-always_mdtm/-noal‐
       ways_mdtm]

DESCRIPTION
       This  manual page describes how to use pavuk. Pavuk can be used to mir‐
       ror contents of internet/intranet servers and to maintain copies	 in  a
       local  tree  of documents.  Pavuk stores retrieved documents in locally
       mapped disk space. The structure of the local tree is the same  as  the
       one on the remote server. Each supported service (protocol) has its own
       subdirectory in the local tree.	Each referenced	 server	 has  its  own
       subdirectory  in	 these	protocols subdirectories; followed by the port
       number on which the service resides, delimited by character can	be  be
       changed.	 With the option -fnrules you can change the default layout of
       the local document tree, without losing link consistency.
       With pavuk it is possible to have up-to-date copies of remote documents
       in the local disk space.
       As  of  version	0.3pl2, pavuk can automatically restart broken connec‐
       tions, and reget partial content from an FTP server (which must support
       the  REST command), from a properly configured HTTP/1.1 server, or from
       a HTTP/1.0 server which supports Ranges.
       As of version 0.6 it is possible to handle configurations via so called
       scenarios.   The best way to create such a configuration file is to use
       the X Window interface and simply save the created  configuration.  The
       other way is to use the -dumpscn switch.
       As  of version 0.7pl1 it is possible to store authentification informa‐
       tion into an authinfo file, which pavuk can then parse and use.
       As of version 0.8pl4 pavuk can fetch  documents	for  use  in  a	 local
       proxy/cache server without storing them to local documents tree.
       As of version 0.9pl4 pavuk supports SOCKS (4/5) proxies if you have the
       required libraries.
       As of version 0.9pl12 pavuk can preserve permissions  of	 remote	 files
       and symbolic links, so it can be used for powerful FTP mirroring.
       Pavuk  supports	SSL connections to FTP servers, if you specify ftps://
       URL instead of ftp://.
       Pavuk can automatically handle file names with  unsafe  characters  for
       filesystem.   This is yet implemented only for Win32 platform and it is
       hard coded.
       Pavuk can  now  use  HTTP/1.1  protocol	for  communication  with  HTTP
       servers.	  It  can  use	persistent  connections, so one TCP connection
       should be used to transfer several documents without closing  it.  This
       feature saves network bandwidth and also speedup network communication.
       Pavuk  can  do  configurable  POST requests to HTTP servers and support
       also file uploading via HTTP POST request.
       Pavuk can automatically fill found HTML forms, if user will supply data
       for its fields before with option -formdata.
       Pavuk  can  run configurable number of concurrently running downloading
       threads when compiled with multithreading support.

Format of supported URLs
       HTTP
       http://[[user][:password]@]host[:port][/document]
       [[user][:password]@]host[:port][/document]

       HTTPS
       https://[[user][:password]@]host[:port][/document]
       ssl[.domain][:port][/document]

       FTP
       ftp://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftp://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftp[.domain][:port][/document][;type=x]

       FTPS
       ftps://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftps://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftps[.domain][:port][/document][;type=x]

       Gopher
       gopher://host[:port][/type[document]]
       gopher[.domain][:port][/type[document]]

Default mapping of URLs to local filenames
       HTTP
       http://[[user][:password]@]host[:port][/document][?query]
       to
       http/host_port/[document][?query]

       HTTPS
       https://[[user][:password]@]host[:port][/document][?query]
       to
       https/host_port/[document][?query]

       FTP
       ftp://[[user][:password]@]host[:port][/path]
       to
       ftp/host_port/[path]

       FTPS
       ftps://[[user][:password]@]host[:port][/path]
       to
       ftps/host_port/[path]

       Gopher
       gopher://host[:port][/type[document]]
       to
       gopher/host_port/[type[document]]

       NOTE: Pavuk will use the string with which it queries the target server
       as  the	name  of  the results file. This file name may, in some cases,
       contain punctuations such as $,?,=,& etc. Such  punctuation  can	 cause
       problems	 when  you  are	 trying	 to  browse downloaded files with your
       browser or you are  trying  to  process	downloaded  files  with	 shell
       scripts	or  view  files with file management utilities which reference
       the name of the results file.  If you believe that this	maybe  causing
       problems	 for  you, then you can remove all punctuation from the result
       file name with the option: -tr_del_chr [:punct:] or with other  options
       for adjusting filenames.

OPTIONS
	All options are case insensitive.

List of options chapters
       Mode
       Help
       Indicate/Logging/Interface options
       Netli options
       Special start
       Scenario/Task options
       Directory options
       Preserve options
       Proxy options
       Proxy Authentification
       Protocol/Download Option
       Authentification
       Site/Domain/Port Limitation Options
       Limitation Document properties
       Limitation Document name
       Limitation Protocol Option
       Other Limitation Options
       Javascript support
       Cookie
       HTML rewriting engine tuning options
       Filename/URL Conversion Option
       Other Options

Mode
       -mode {normal, linkupdate, sync, singlepage, singlereget, resumeregets}
	      Set operation mode.
	      normal - retrieves recursive documents
	      linkupdate - update remote URLs in local HTML documents to local
	      URLs if these URLs exist in the local tree
	      sync - synchronize remote documents with local tree (if a	 local
	      copy  of	a  document  is	 older	than  remote,  the document is
	      retrieved again, otherwise nothing happens)
	      singlepage - URL is  retrieved  as  one  page  with  all	inline
	      objects  (picture,  sound	 ...)	this  mode is now obsoleted by
	      -singlepage option.
	      resumeregets - pavuk scans the local tree for  files  that  were
	      not  retrieved  fully and retrieves them again (uses partial get
	      if possible)
	      singlereget - get URL until it is retrieved in full
	      dontstore - transfer page from server, but don't store it to the
	      local  tree.   This mode is suitable for fetching pages that are
	      held in a local proxy/cache server.
	      reminder - used to inform the user about changed documents
	      ftpdir - used to list of contents of FTP directories

	      default operation mode is normal mode.

Help
       -h     Print long verbose help message

       -v     Show version informations and configuration at compilation time.

Indicate/Logging/Interface options
       -quiet Don't show any messages on the screen.

       -verbose
	      Force to show output messages on the screen (default)

       -progress/-noprogress
	      Show retrieving progress while running in the terminal  (default
	      is progress off)

       -stime/-nostime
	      Show  start and end time of transfer. (default isn't this infor‐
	      mation shown)

       -xmaxlog $nr
	      Maximum number of log lines in the Log widget.  0	 means	unlim‐
	      ited.  This option is available only when compiled with the GTK+
	      GUI. (default value is 0)

       -logfile $file
	      File where all produced messages are stored.

       -unique_log/-nounique_log
	      When logfile as specified with the option	 -logfile  is  already
	      used by another process, try to generate new unique name for the
	      log file. (default is this option turned off)

       -slogfile $file
	      File to store short logs in. This	 file  contains	 one  line  of
	      informations  per	 processed document.  This is meant to be used
	      in connection with any sort of script to	produce	 some  statis‐
	      tics,  for  validating  links on your website, or for generating
	      simple sitemaps.	Multiple pavuk processes  can  use  this  file
	      concurrently,  without  overwriting each others entries.	Record
	      structure:

	      - PID of pavuk process
	      - TIME current time
	      - COUNTER in the format current/total number of URLs
	      - STATUS contains the type of the error: FATAL, ERR,
		WARN or OK
	      - ERRCODE is the number code of the error
		(see errcode.h in pavuk sources)
	      - URL of the document
	      - PARENTURL first parent document of this URL
		(when it doesn't have parent - [none])
	      - FILENAME is the name of the local file the
		document is saved under
	      - SIZE size of requested document if known
	      - DOWNLOAD_TIME time which takes downloading of this
		document in format seconds.mili_seconds
	      - HTTPRESP contains the first line of the HTTP server
		response

       -language $str
	      Native language that pavuk should use for communication with its
	      user  (works  only when there is a message catalog for that lan‐
	      guage) GNU gettext support  (for	message	 internationalization)
	      must  also  be  compiled in. Default language is taken from your
	      NLS environment variables.

       -gui_font $font
	      Font used in the GUI interface. To list available	 X  fonts  use
	      the  xlsfonts  command.  This option is available only when com‐
	      piled with GTK+ GUI support.

Netli options
       -[no]read_css
	      Enable or disable fetching objects mentioned in style sheets.

       -[no]verify
	      Enable or disable verifying server CERTS in SSL mode.

       -tlogfile $file
	      Turn on Netli logging with output to specified file.

       -trelative {object | program}
	      Make Netli timings relative to the start of the first object  or
	      the program.

       -transparent_proxy FQDN[:port]
	      When  processing	URL,  send the original, but send it to the IP
	      address at FQDN

       -transparent_ssl_proxy FQDN[:port]
	      When processing HTTPS URL, send the original, but send it to the
	      IP address at FQDN

       -sdemo Output  in  sdemo compatible format. This is only used by sdemo.
	      (For now it simply means output '-1' rather than '*'  when  mea‐
	      surements are invalid.)

       -noencode
	      Do not escape characters that are "unsafe" in URLS.

Special start
       -X     Start  program with X Window interface (if compiled with support
	      for GTK+).  Pavuk as default starts without GUI, and behaves  as
	      regular commandline tool.

       -runX  When  used  together with the -X option, pavuk starts processing
	      of URLs immediately after the GUI window	is  launched.  Without
	      the  -X given, this option doesn't have any effect.  Only avail‐
	      able when compiled with GTK+ support .

       -bg/-nobg
	      This option allows pavuk to detach from its terminal and run  in
	      background mode.	Pavuk will not output any messages to the ter‐
	      minal than. If you want to see messages, you  have  to  use  the
	      -log_file	 option to specify a file where messages will be writ‐
	      ten.  Default pavuk executes at foreground.

       -check_bg/-nocheck_bg
	      Normally, programs sent into the background after being  run  in
	      foreground continue to output messages to the terminal.  If this
	      option is activated, pavuk checks if it is running as background
	      job  and	will  not  write  any messages to the terminal in this
	      case. After it becomes a foreground job  again,  it  will	 start
	      writing  messages to terminal in the normal way.	This option is
	      available only when your system supports retrieving of  terminal
	      info via tc*() functions.

       -prefs/-noprefs
	      When  you	 turn this option on, pavuk will preserve all settings
	      when exiting, and when you run pavuk with GUI  interface	again,
	      all  settings  will be restored.	The settings will be stored in
	      the ~./pavuk_prefs file. Default pavuk want restore  its	option
	      when  started.  This option is available only when compiled with
	      GTK+.

       -schedule $time
	      Execute pavuk at the time specified as parameter. The Format  of
	      the  $time  parameter  is YYYY.MM.DD.hh.mm.  You need a properly
	      configured scheduling with the at command	 on  your  system  for
	      using  this  option.   If	 default  configuration	 (at  -f %f %t
	      %d.%m.%Y) of scheduling command won't work on your  system,  try
	      to adjust it with -sched_cmd option.

       -reschedule $nr
	      Execute  pavuk  periodically  with  $nr  hours period.  You need
	      properly configured scheduling with the at command on your  sys‐
	      tem for using this option.

       -sched_cmd $str
	      Command  to use for scheduling. Pavuk explicitly supports sched‐
	      uling with at $str should contain regular characters and macros,
	      escaped by % character.  Supported macros are:
		 %f
		  - for script filename
		 %t
		  - for time (in format HH:MM)
		  - all macros as supported by the strftime() function

       -urls_file $file
	      If  you  use this option, pavuk will read URLs from $file before
	      it starts processing.  In this file, each URL needs to be	 on  a
	      separate	line. After the last URL, a single dot . followed by a
	      LF (line-feed) character denotes the end.	 Pavuk will start pro‐
	      cessing  right after all URLs have been read.  If $file is given
	      as the - character, standard input will be read.

       -store_info/-nostore_info
	      This option causes pavuk to store information about  each	 docu‐
	      ment  into  a  separate  file in the .pavuk_info directory. This
	      file is used to store the original URL from which	 the  document
	      was  downloaded. For files that are downloaded via HTTP or HTTPS
	      protocols, the whole HTTP response header	 is  stored  there.  I
	      recommend	 to  use  this	option when you are using options that
	      change the default layout of the local  document	tree,  because
	      this info file helps pavuk to map the local filename to the URL.
	      This option is also very useful when  different  URLs  have  the
	      same filename in the local tree. When this occurs, pavuk detects
	      this using info files, and it will prefix the  local  name  with
	      numbers.	At  default is disabled storing of this extra informa‐
	      tions.

       -info_dir $dir
	      You can set with this option location of separate directory  for
	      storing info files created when -store_info option is used. This
	      is useful when you don't want to mix  in	destination  directory
	      the info files with regular document files. The structure of the
	      info files is preserved, just are stored in different directory.

       -request $req
	      With this option	you  can  specify  extended  informations  for
	      starting	URLs.  With this option you can specify query data for
	      POST or GET .  Current syntax of this option is : URL:["]$url["]
	      [METHOD:["]{GET|POST}["]] [ENCODING:["]{u|m}["]] [FIELD:["]vari‐
	      able=value["]]			 [FILE:["]variable=filename["]
	      [LNAME:["]local_filename["]]

	      - URL: specifies request URL
	      - METHOD: specifies request method for URL and is
		one of GET or POST.
	      - ENCODING: specifies encoding for request body data.
		  m is for multipart/form-data encoding
		  u is for application/x-www-form-urlencoded
		  encoding
	      - FIELD: specifies field of request data in format
		  variable=value. For encoding of special characters
		  in variable and value you can use same encoding
		  as is used in application/x-www-form-urlencoded
		  encoding.
	      - FILE: specifies special field of query, which is
		  used to specify file for POST based file upload.
	      - LNAME: specifies localname for this request
       When  you  need	to  use	 inside the FIELD: and FILE: fields of request
       specification special characters, you should use the application/x-www-
       form-urlencoded	encoding  of characters. It means all nonASCII charac‐
       ters, quote character ("), space character  (  ),  ampersand  character
       (&), percent character (%) and equal character (=) should be encoded in
       form %xx where xx is hexadecimal representation of ASCII value of char‐
       acter. So for example % character should be encoded like %25.

       -formdata $req
	      This  option gives you chance to specify contents for HTML forms
	      found during traversing document tree.
	       Current syntax of this option is same as for  -request  option,
	      but  ENCODING: and METHOD: are meaningless in this option seman‐
	      tics.
	       In URL: you have to specify HTML form action URL, which will be
	      matched  against	action URLs found in processed HTML documents.
	      If pavuk finds action URL which matches that supplied in	-form‐
	      data  option, pavuk will construct GET or POST request from data
	      supplied in this option and from default form field values  sup‐
	      plied  in	 HTML  document.  Values  supplied on commandline have
	      precedence before that supplied in HTML file.

       -nthreads $nr
	      By means of this option you  can	specify	 how  many  concurrent
	      threads  will  download documents. Default pavuk executes 3 con‐
	      current downloading threads.  This option is available only when
	      pavuk is compiled to support multithreading.

       -immesg/-noimmesg
	      Default	pavuks	behavior  when	running	 multiple  downloading
	      threads is to buffer all output messages in  memory  buffer  and
	      flush that buffered data just when thread finishes processing of
	      one document. With this option you can change this  behavior  to
	      see  the	messages  immediately  when it is produced. It is only
	      usable when you want to debug some  specials  in	multithreading
	      environment.   This  option is available only when pavuk is com‐
	      piled to support multithreading.

       -dumpfd $nr
	      For scripting is sometimes usable to be able to  download	 docu‐
	      ment directly to pipe or variable instead of storing it to regu‐
	      lar file. In such case you can use this option to dump data  for
	      example to stdout ($nr = 1).

       -dump_after/-nodump_after
	      While  using  -dumpfd  option  in	 multithreaded	pavuk,	it  is
	      required to dump document in one moment because documents	 down‐
	      loaded in multiple threads can overlap. This option is also use‐
	      ful when you want to dump document  after	 pavuk	adjusts	 links
	      inside HTML documents.

       -dump_response/-nodump_response
	      This  option  have effect only when used with -dumpfd option. It
	      is used to dump HTTP response headers.

       -dump_urlfd $nr
	      When you will use this option, pavuk will output all URLs	 found
	      in  HTML	documents  to  file  descriptor	 $nr. You can use this
	      option to extract and convert all URLs to absolute.

Scenario/Task options
       -scenario $str
	      Name of scenario to load and/or run. Scenarios are files with  a
	      structure similar to the .pavukrc file.  Scenarios contain saved
	      configurations. You can use it for periodical mirroring. Parame‐
	      ters  from  scenarios specified at the command line can be over‐
	      written by command line parameters.  To  be  able	 to  use  this
	      option,  you need to specify scenario base directory with option
	      -scndir.

       -dumpscn $filename
	      Store actual configuration into scenario file with  name	$file‐
	      name.  This is useful to quickly create pre-configured scenarios
	      for manual editing.

Directory options
       -msgcat $dir
	      Directory which contains the message catalog for pavuk.  If  you
	      do  not  have permission to store a pavuk message catalog in the
	      system directory, you should simply create similar structure  of
	      directories in your home directory as it is on your system.

	      For example:

	      Your  native  language  is  German,  and	your home directory is
	      /home/jano.

	      You     should	 at	first	  create     the     directory
	      /home/jano/locales/de/LC_MESSAGES/, then put the German pavuk.mo
	      there and set -msgcat to /home/jano/locales/.  If you have prop‐
	      erly  set locale environment values, you will see pavuk speaking
	      German.  This option is available only when you compiled in sup‐
	      port for GNU gettext messages internationalization.

       -cdir $dir
	      Directory	 where	are all retrieved documents are stored. If not
	      specified, the current  directory	 is  used.  If	the  specified
	      directory doesn't exist, it will be created.

       -scndir $dir
	      Directory in which your scenarios are stored.  You must use this
	      option when you are loading or storing scenario files.

Preserve options
       -preserve_time/-nopreserve_time
	      Store downloaded document with same modification time as on  the
	      remote site. Modification time will be set only when such infor‐
	      mation is available (some FTP servers do not  support  the  MDTM
	      command,	and  some documents on HTTP servers are created online
	      so pavuk can't retrieve the modification time of this document).
	      At default modification time of documents isn't preserved.

       -preserve_perm/-nopreserve_perm
	      Store  downloaded	 document  with the same permissions as on the
	      remote site.  This option has effect  only  when	downloading  a
	      file  through  FTP protocol and assumes that the -ftplist option
	      is used. At default permissions are not preserved.

       -preserve_slinks/-nopreserve_slinks
	      Set symbolic links to point exactly to same location as  on  the
	      remote server; don't do any relocations.	This option has effect
	      only when downloading file through FTP protocol and assumes that
	      the  -ftplist  option  is	 used.	Default symbolic links are not
	      preserved, and are retrieved as regular documents with full con‐
	      tents of linked file.

	      For example, assume that on the FTP server ftp.xx.org there is a
	      symbolic	link  /pub/pavuk/pavuk-current.tgz,  which  points  to
	      /tmp/pub/pavuk-0.9pl11.tgz.   Pavuk  will	 create	 symbolic link
	      ftp/ftp.xx.org_21/pub/pavuk/pavuk-current.tgz
	      if option -preserve_slinks will be used this symbolic link  will
	      point to /tmp/pub/pavuk-0.9pl11.tgz
	      if option -preserve_slinks want be used, this symbolic link will
	      point to
	       ../../tmp/pub/pavuk-0.9pl11.tgz

       -retrieve_symlink/-noretrieve_symlink
	      Retrieve files behind symbolic links instead of replicating sym‐
	      links in local tree.

Proxy options
       -http_proxy $site[:$port]
	      If  this	parameter  is  used,  then all HTTP requests are going
	      through this proxy server. This is useful if your	 site  resides
	      behind  a	 firewall,  or	if  you want to use a HTTP proxy cache
	      server. The default port number is 8080.	Pavuk  allows  you  to
	      specify	multiple  HTTP	proxies	 (using	 multiple  -http_proxy
	      options) and it will rotate  proxies  with  roundrobin  priority
	      disabling proxies with errors.

       -nocache/-cache
	      Use  this	 option whenever you want to get the document directly
	      from the site and not from your HTTP proxy cache server. Default
	      pavuk allows transfer of document copies from cache.

       -ftp_proxy $site[:$port]
	      If  this	parameter  is  used,  then  all FTP requests are going
	      through this proxy  server.   This  is  useful  when  your  site
	      resides behind a firewall, or if you want to use FTP proxy cache
	      server.  The default port number is 22.	Pavuk  supports	 three
	      different types of proxies for FTP, see the options -ftp_httpgw,
	      -ftp_dirtyproxy.	If none of the mentioned options is used, then
	      pavuk assumes a regular FTP proxy with USER user@host connecting
	      to remote FTP server.

       -ftp_httpgw/-noftp_httpgw
	      The specified FTP proxy is a HTTP gateway for the FTP  protocol.
	      Default FTP proxy is regular FTP proxy.

       -ftp_dirtyproxy/-noftp_dirtyproxy
	      The specified FTP proxy is a HTTP proxy which supports a CONNECT
	      request (pavuk should use full FTP protocol,  except  of	active
	      data  connections).  Default FTP proxy is regular FTP proxy.  If
	      both   -ftp_dirtyproxy   and    -ftp_httpgw    are    specified,
	      -ftp_dirtyproxy is preferred.

       -gopher_proxy $site[:$port]
	      Gopher gateway or proxy/cache server.

       -gopher_httpgw/-nogopher_httpgw
	      The  specified  Gopher proxy server is a HTTP gateway for Gopher
	      protocol.	 When -gopher_proxy is	set  and  this	-gopher_httpgw
	      option isn't used, pavuk is using proxy as HTTP tunnel with CON‐
	      NECT request to open connections to Gopher servers.

       -ssl_proxy $site[:$port]
	      SSL proxy (tunneling) server [as that in CERN httpd +  patch  or
	      in  Squid]  with enabled CONNECT request (at least on port 443).
	      This option is available only when  compiled  with  SSL  support
	      (you need the SSleay or OpenSSL libraries with development head‐
	      ers)

Proxy Authentification
       -http_proxy_user $user
	      Username for HTTP proxy authentification.

       -http_proxy_pass $pass
	      Password for HTTP proxy authentification.

       -http_proxy_auth {1/2/3/4/user/Basic/Digest/NTLM}
	      Authentification scheme for proxy access. Similar meaning as the
	      -auth_scheme option (see help for this option for more details).
	      Default is 2 (Basic scheme).

       -auth_proxy_ntlm_domain $str
	      NT or LM domain used for authorization again HTTP	 proxy	server
	      when  NTLM  authentification  scheme is required. This option is
	      available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_proxy_nonce/-noauth_reuse_proxy_nonce
	      When using HTTP Proxy Digest access authentification scheme  use
	      first received nonce value in multiple following requests.

       -ftp_proxy_user $user
	      Username for FTP proxy authentification.

       -ftp_proxy_pass $pass
	      Password for FTP proxy authentification.

Protocol/Download Options
       -ftp_passive
	      Uses passive ftp when downloading via ftp.

       -ftp_active
	      Uses active ftp when downloading via ftp.

       -active_ftp_port_range $min:$max
	      This  option  permits  to specify the ports used for active ftp.
	      This permits easier firewall configuration since	the  range  of
	      ports can be restricted.

	      Pavuk  will  randomly  choose a number from within the specified
	      range until an open port is found. Should no open ports be found
	      within  the  given range, pavuk will default to a normal kernel-
	      assigned port, and a message (debug level net) is output.

	      The port range selected must be in the non-privileged range (eg.
	      greater  than or equal to 1024); it is STRONGLY RECOMMENDED that
	      the chosen range be large enough	to  handle  many  simultaneous
	      active  connections  (for	 example, 49152-65534, the IANA-regis‐
	      tered ephemeral port range).

       -always_mdtm/-noalways_mdtm
	      Force pavuk to always use "MDTM" to determine the file modifica‐
	      tion  time  and  never uses cached times determined when listing
	      the remote files.

       -remove_before_store/-noremove_before_store
	      Force unlink'ing of files before new  content  is	 stored	 to  a
	      file.  This is helpful if the local files are hardlinked to some
	      other directory and after mirroring the hardlinks	 are  checked.
	      All "broken" hardlinks indicate a file update.

       -retry $nr
	      Set  the	number	of  attempts  to  transfer processed document.
	      Default set to 1, this mean pavuk will retry once to  get	 docu‐
	      ments which failed on first attempt.

       -nregets $nr
	      Set  the	number of allowed regets on a single document, after a
	      broken transfer.	Default value for this option is 2.

       -nredirs $nr
	      Set number of allowed HTTP redirects. (use this  for  prevention
	      of  loops)  Default  value  for this option is 5, and conform to
	      HTTP specification.

       -force_reget/-noforce_reget
	      Force reget'ing of the whole document after  a  broken  transfer
	      when  the	 server doesn't support retrieving of partial content.
	      Pavuk default behavior is to stop getting documents which	 don't
	      allow restarting of transfer from specified position.

       -timeout $nr
	      Timeout  for  stalled connections in minutes. This value is also
	      used for connection timeouts. For sub-minute  timeouts  you  can
	      use floating point numbers.  Default timeout is 0, an that means
	      timeout checking is disabled.

       -noRobots/-Robots
	      This switch suppresses the use of the robots.txt standard, which
	      is  used	to  restrict access of Web robots to some locations on
	      the web server. Default is allowed checking of robots.txt	 files
	      on  HTTP	servers.  Enable this option always when you are down‐
	      loading huge sets of pages with unpredictable layout.  This pre‐
	      vents you from upsetting server administrators :-).

       -noEnc/-Enc
	      This  switch  suppresses	using  of  gzip or compress or deflate
	      encoding in transfer. I don't know if some servers are broken or
	      what,  but  they are propagating that MIME type application/gzip
	      or application/compress as encoded. Turn this option  off,  when
	      you  doesn't have libz support compiled in and also gzip program
	      which is used to decode document encoded this way.   At  default
	      is decoding of downloaded document disabled.

       -check_size/-nocheck_size
	      The  option  -nocheck_size  should  be used if you are trying to
	      download pages from a HTTP server which sends a  wrong  Content-
	      Length:  field  in  the  MIME header of response.	 Default pavuk
	      behavior is to check this field and complain when	 something  is
	      wrong.

       -maxrate $nr
	      If  you don't want to give all your transfer bandwidth to pavuk,
	      use this option to  set  pavuk's	maximum	 transfer  rate.  This
	      option  accepts  a floating point number to specify the transfer
	      rate in kB/s. If you want get optimal settings, you also have to
	      play  with the size of the read buffer (option -bufsize) because
	      pavuk is doing flow  control  only  at  application  level.   At
	      default pavuk use full bandwidth.

       -minrate $nr
	      If you hate slow transfer rates, this option allows you to break
	      transfers with slow speed. You  can  set	the  minimum  transfer
	      rate, and if the connection gets slower than the given rate, the
	      transfer will be stopped. The minimum transfer rate is given  in
	      kB/s.  At default pavuk doesn't check this limit.

       -bufsize $nr
	      This  option  is	used  to  specify  the size of the read buffer
	      (default size: 32kB).  If you have a very fast  connection,  you
	      may increase the size of the buffer to get a better read perfor‐
	      mance. If you need to decrease the transfer rate, you  may  need
	      to  decrease the size of the buffer and set the maximum transfer
	      rate with the -maxrate option. This option accepts the  size  of
	      the buffer in kB.

       -fs_quota $nr
	      If  you are running pavuk on a multiuser system, you may need to
	      avoid filling up your file system. This option lets you  specify
	      how many space must remain free. If pavuk detects an underrun of
	      the free space, it will stop  downloading	 files.	 Specify  this
	      quota  in	 kB.  Default value is 0, and that mean no checking of
	      this quota.

       -file_quota $nr
	      This option is useful when you want to limit downloading of  big
	      files,  but  want	 to  download  at least $nr kilobytes from big
	      files.  A big file will be transferred, and when it reaches  the
	      specified	 size, transfer will break. Such document will be pro‐
	      cessed as properly downloaded, so be  careful  when  using  this
	      option.	At  default  pavuk  is transferring full size of docu‐
	      ments.

       -trans_quota $nr
	      If you are aware that your selection should address a big amount
	      of  data,	 you can use this option to limit the amount of trans‐
	      ferred data.  Default is by size unlimited transfer.

       -max_time $nr
	      Set maximum amount of  time  for	program	 run.  After  time  is
	      exceeded, pavuk will stop downloading. Time is specified in min‐
	      utes. Default value is 0, and it means downloading time  is  not
	      limited.

       -url_strategy $strategy
	      This  option  allows you to specify a downloading order for URLs
	      in document tree.	 This option accepts the following strings  as
	      parameters :

	      level - will order URLs as it loads it from HTML files (default)
	      leveli - as previous, but inline objects URLs come first
	      pre  -  will  insert  URLs  from	actual HTML document at start,
	      before other
	      prei - as previous, but inline objects URLs come first

       -send_if_range/-nosend_if_range
	      Send If-Range: header in HTTP request. I found  out,  that  some
	      HTTP  servers  (greetings,  MS  :-)) are sending different ETag:
	      fields in different responses for the same, unchanged  document.
	      This  causes  problems  when  pavuk attempts to reget a document
	      from such a server: pavuk will remember the old ETag  value  and
	      uses  it it following requests for this document.	 If the server
	      checks it with the new ETag value and it differs, it will refuse
	      to  send	only part of the document, and start the download from
	      scratch.

       -ssl_version $v
	      Set required SSL protocol version for SSL communication.	$v  is
	      one of ssl2, ssl23, ssl3 or tls1.	 This option is available only
	      when compiled with SSL support.  Default is ssl23.

       -unique_sslid/-nounique_sslid
	      This option can be used if you want to use a unique SSL  ID  for
	      all  SSL	sessions.  Default pavuk behavior is to negotiate each
	      time new session ID for each connection.	This option is	avail‐
	      able only when compiled with SSL support.

       -use_http11/-nouse_http11
	      This option is used to switch between HTTP/1.0 and HTTP/1.1 pro‐
	      tocol used with HTTP servers. Now is using of HTTP/1.1  protocol
	      not  default  because  its  implementation is very fresh and not
	      100% tested. Even though using of HTTP/1.1 is very  recommended,
	      because  it  is faster than HTTP/1.0 and uses less network band‐
	      width for initiating connections. In any further version I  will
	      activate using of HTTP/1.1 as default.

       -local_ip $addr
	      You  can	use this option when you want to use specified network
	      interface for communication with other  hosts.  This  option  is
	      suitable	for  multihomed hosts with several network interfaces.
	      Address should be entered as regular IP address or as host name.

       -identity $str
	      This option allows you to specify content of  User-Agent:	 field
	      of  HTTP request.	 This is usable, when scripts on remote server
	      returns different document on same URL for  different  browsers,
	      or  if  some HTTP server refuse to serve document for Web robots
	      like pavuk. Default pavuk sends in User-Agent: field pavuk/$VER‐
	      SION string.

       -auto_referer/-noauto_referer
	      This option forces pavuk to send HTTP Referer: header field with
	      starting URLs.  Content of this field will be  self  URL.	 Using
	      this  option is required, when remote server checks the Referer:
	      field.  At default pavuk wont send Referer: field with  starting
	      URLs.

       -referer/-noreferer
	      This  option  allows  to	enable and disable the transmission of
	      HTTP Referer: header field.  At  default	pavuk  sends  Referer:
	      field.

       -httpad $str
	      In  some	cases  you  may	 want  to  add	user defined fields to
	      HTTP/HTTPS requests.  This option is exactly for	this  purpose.
	      In  $str	you can directly specify content of additional header.
	      If you specify only raw header, it will be used only for	start‐
	      ing requests. When you want to use this header with each request
	      while crawling, prefix the header with + character.

       -del_after/-nodel_after
	      This option allows you to delete FILES from REMOTE server,  when
	      download is properly finished. At default is this option off.

       -FTPlist/-noFTPlist
	      When  option  -FTPlist will be used, pavuk will retrieve content
	      of FTP directories with FTP command LIST instead of NLST. So the
	      same  listing  will  be  retrieved as with "ls -l" UNIX command.
	      This option is required if you need to preserve  permissions  of
	      remote files or you need to preserve symbolic links.  Pavuk sup‐
	      ports wide listing on FTP servers with regular BSD or SYSV style
	      "ls -l" directory listing, on FTP servers with EPFL listing for‐
	      mat, VMS style listing,  DOS/Windows  style  listing  and	 Novel
	      listing  format.	 Default pavuk behavior is to use NLST fro FTP
	      directory listings.

       -ftp_list_options $str
	      Some FTP servers require to supply extra options to LIST or NLST
	      FTP  commands to show all files and directories properly. But be
	      sure not to use any extra options which can reformat  output  of
	      the  listing.  Useful  is	 especially  -a option which force FTP
	      server to show also dot files and directories  and  with	broken
	      WuFTP  servers  it also helps to produce full directory listings
	      not just files.

       -fix_wuftpd/-nofix_wuftpd
	      This option is result of several	attempts  to  to  get  working
	      properly the -remove_old option with WuFTPd server when -ftplist
	      option is used. The problem is that FTP command LIST  on	WuFTPd
	      don't  mind when trying to list nonexisting directory, and indi‐
	      cates success in FTP response  code.   When  you	activate  this
	      option,  pavuk  uses  extra  FTP	command (STAT -d dir) to check
	      whether the directory really exists. Don't use this option until
	      you are sure that you really need it!

Authentification
       -auth_file $file
	      File  where  you	have  stored  authentification information for
	      access to some service. For file structure see  below  in	 FILES
	      section.

       -auth_name $user
	      If  you are using this parameter, program is doing authentifica‐
	      tion with each HTTP access to document. Use  this	 only  if  you
	      know  that  only one HTTP server could be accessed or use -asite
	      option to specify site to which you use  authentification.  Else
	      your auth parameters will be sent to each accessed HTTP server.

       -auth_passwd $passwd
	      Value of this parameter is used as password for authentification

       -auth_scheme {1/2/3/4/user/Basic/Digest/NTLM}
	      This parameter specifies used authentification scheme.
	      1	 or user means user authentification scheme is used as defined
	      in HTTP/1.0 or HTTP/1.1.	Password and user name are sent	 unen‐
	      coded.
	      2	 or  Basic  means  Basic  authentification  scheme  is used as
	      defined in HTTP/1.0.  Password and user  name  are  sent	BASE64
	      encoded.
	      3 or Digest means Digest access authentification scheme based on
	      MD5 checksums as defined in RFC2069.
	      4 or NTLM means NTLM proprietary access authentification	scheme
	      used  by	Microsoft  IIS	or  Proxy  servers.  When you use this
	      scheme, you must also  specify  NT  or  LM  domain  with	option
	      -auth_ntlm_domain.  This	scheme is supported only when compiled
	      with OpenSSL or libdes libraries.

       -auth_ntlm_domain $str
	      NT or LM domain used for authorization again  HTTP  server  when
	      NTLM  authentification scheme is required. This option is avail‐
	      able only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_nonce/-noauth_reuse_nonce
	      While using HTTP Digest access authentification scheme use first
	      received	nonce value in more following requests.	 Default pavuk
	      negotiates nonce for each request.

       -ssl_key_file $file
	      File with public key for SSL certificate (learn more from SSLeay
	      or  OpenSSL  documentation)  This	 option is available only when
	      compiled with SSL support (you need SSleay or OpenSSL  libraries
	      and development headers)

       -ssl_cert_file $file
	      Certificate  file	 in  PEM  format  (learn  more	from SSLeay or
	      OpenSSL documentation) This option is available only  when  com‐
	      piled with SSL support (you need SSleay or OpenSSL libraries and
	      development headers)

       -ssl_cer_passwd $str
	      Password used to generate certificate (learn more from SSLeay or
	      OpenSSL  documentation)  This option is available only when com‐
	      piled with SSL support (you need SSLeay or OpenSSL libraries and
	      development headers)

       -nss_cert_dir $dir
	      Config  directory for NSS (Netscape SSL implementation) certifi‐
	      cates.  Usually  ~/.netscape  (created  by  Netscape  communica‐
	      tor/navigator) or profile directory below ~/.mozilla (created by
	      Mozilla browser). The  directory	should	contain	 cert7.db  and
	      key3.db  files.  If you don't use Mozilla nor Netscape, you must
	      create this files by utilities distributed with  NSS  libraries.
	      Pavuk  opens certificate database only readonly.	This option is
	      available only when pavuk is compiled with SSL support  provided
	      by Netscape NSS SSL implementation.

       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
	      By default will pavuk reject connection to SSL server which cer‐
	      tificate is not stored in local  certificate  database  (set  by
	      -nss_cert_dir option).  You must explicitly force pavuk to allow
	      connection to servers with unknown certificates.	This option is
	      available	 only when pavuk is compiled with SSL support provided
	      by Netscape NSS SSL implementation.

       [-nss_domestic_policy/-nss_export_policy]
	      Selects sets of ciphers allowed/disabled by  USA	export	rules.
	      This  option  is	available only when pavuk is compiled with SSL
	      support provided by Netscape NSS SSL implementation.

       -from $email
	      This parameter is used when accessing anonymous  FTP  server  as
	      password	or  is	optionally  inserted  into  From field in HTTP
	      request. If not specified pavuk discovers this from  USER	 envi‐
	      ronment variable and from site hostname.

       -send_from/-nosend_from
	      This  option  is	used for enabling or disabling sending of user
	      identification, entered in -from option, as FTP  anonymous  user
	      password	and  From:  field of HTTP request.  As default is this
	      option off.

       -ftp_login_handshake $host $handshake
	      When you need to use nonstandard login procedure for some of FTP
	      servers,	you  can use this option to change default pavuk login
	      procedure. To allow more flexibility, you can assign  the	 login
	      procedure	 to  some server or to all. When $host is specified as
	      empty string (""), than attached login procedure is assigned  to
	      all  FTP	servers besides those having assigned own login proce‐
	      dures. In the $handshake parameter you can specify  exact	 login
	      procedure	 specified  by	FTP  commands followed by expected FTP
	      response codes delimited with backslash (\) characters.
	      For example this is default login procedure when logging in reg‐
	      ular  ftp	 server	 without  going	 through  proxy	 server : USER
	      %u\331\PASS %p\230. There	 are  two  commands  followed  by  two
	      response	codes.	After  USER command pavuk expects FTP response
	      code 331 and after PASS command pavuk expects  from  server  FTP
	      response	code 230. In ftp commands you can use following macros
	      which will be replaced by respective values:

	       %u - user name used to access FTP server
	       %p - password used to access FTP server
	       %U - user name used to access FTP proxy server
	       %P - password used to access FTP proxy server
	       %h - hostname of FTP server
	       %s - port number on which FTP server listens

Site/Domain/Port Limitation Options
       -asite $list
	      Specify comma separated list of allowed sites  on	 which	refer‐
	      enced documents are stored.

       -dsite $list
	      Specify  comma  separated	 list  of  disallowed sites.  Previous
	      parameter is opposite to this one. If both  are  used  the  last
	      occurrence of them is used to be valid.

       -adomain $list
	      Specify  comma separated list of allowed domains on which refer‐
	      enced documents are stored.

       -ddomain $list
	      Specify comma separated list  of	disallowed  domains.  Previous
	      parameter	 is  opposite  to  this one. If both are used the last
	      occurrence of them is used to be valid.

       -aport $list
	      In $list, you can write comma separated list of ports from which
	      you allow to download documents.

       -dport $list
	      This option is opposite option to previous option. It is used to
	      specify denied ports. If both -aport and -dport options are used
	      the  last	 occurrence  of them is used to be valid and all other
	      occurrences will be omitted.

Limitation Document properties
       -amimet $list
	      List of comma separated allowed MIME types.  You	can  use  with
	      this option also wildcard patterns.

       -dmimet $list
	      List  of comma separated disallowed MIME types. You can use with
	      this option also wildcard patterns.  Previous parameter is oppo‐
	      site  to	this one. If both are used the last occurrence of them
	      is used to be valid.

       -maxsize $nr
	      Maximum allowed size of document.	 This option is	 applied  only
	      when  pavuk  is  able to detect the document before starting the
	      transfer.	 Default value is 0, and it  means  this  limit	 isn't
	      applied.

       -minsize $nr
	      minimal  allowed	size of document.  This option is applied only
	      when pavuk is able to detect the document	 before	 starting  the
	      transfer.	  Default  value  is  0, and it means this limit isn't
	      applied.

       -newer_than $time
	      Allow only transfer of documents with  modification  time	 newer
	      than   specified	 in  parameter	$time.	Format	of  $time  is:
	      YYYY.MM.DD.hh:mm.	 To apply this option pavuk must  be  able  to
	      detect modification time of document.

       -older_than $time
	      Allow  only  transfer  of documents with modification time older
	      than  specified  in  parameter  $time.  Format  of   $time   is:
	      YYYY.MM.DD.hh:mm.	  To  apply  this option pavuk must be able to
	      detect modification time of document.

       -noCGI/-CGI
	      this switch prevents to transfer dynamically generated  paramet‐
	      ric  documents  through  CGI  interface.	This  is detected with
	      occurrence of ? character inside URL.  Default pavuk behavior is
	      to allow transfer of URLs with query strings.

       -alang $list
	      this  allows you to specify ordered comma separated list of pre‐
	      ferred natural languages. This option work only  with  HTTP  and
	      HTTPS protocol using Accept-Language: MIME field.

       -acharset $list
	      This  options  allows  you to enter comma separated list of pre‐
	      ferred encoding of transfered documents. This  works  only  with
	      HTTP  and	 HTTPS	urls  and  only if such document encodings are
	      located on destination server.
	      example: -acharset iso-8859-2,windows-1250,utf8

Limitation Document name
       -asfx $list
	      This parameter allows you to specify set	of  suffixes  used  to
	      restrict selection of documents which will be processed.

       -dsfx $list
	      Set  of  suffixes that are used to specify restriction on selec‐
	      tion of documents.  This one is inverse to previous option. They
	      are segregating each other.

       -aprefix $list, -dprefix $list
	      This  two	 options allow you to specify set of allowed or disal‐
	      lowed prefixes of documents. They are segregating each other.

       -pattern $pattern
	      This option allows you to specify	 wildcard  pattern  for	 docu‐
	      ments. All documents are tested if they match this pattern.

       -rpattern $reg_exp
	      This  is equal option as previous, but this uses regular expres‐
	      sions.  Available only on platforms which have any supported  RE
	      implementation.

       -skip_pattern $pattern
	      This option allows you to specify wildcard pattern for documents
	      that should be skipped.  All documents are tested if they	 match
	      this pattern.

       -skip_rpattern $reg_exp
	      This  is equal option as previous, but this uses regular expres‐
	      sions.  Available only on platforms which have any supported  RE
	      implementation.

       -url_pattern $pattern
	      This option allows you to specify wildcard pattern for URLs. All
	      URLs are tested if they match this pattern.
	      Example:
	      -url_pattern  http://\*.idata.sk:\*/~ondrej/\*  .	 this	option
	      enables  all  HTTP URLs from domain .idata.sk on all ports which
	      are located under /~ondrej/.

       -url_rpattern $reg_exp
	      This is equal option as previous, but this uses regular  expres‐
	      sions.   Available only on platforms which have any supported RE
	      implementation.

       -skip_url_pattern $pattern
	      This option allows you to specify wildcard pattern for URLs that
	      should  be skipped.  All URLs are tested if they match this pat‐
	      tern.

       -skip_url_rpattern $reg_exp
	      This is equal option as previous, but this uses regular  expres‐
	      sions.   Available only on platforms which have any supported RE
	      implementation.

       -aip_pattern $re
	      This option allows you to limit set of transferred documents  by
	      server  IP  address.   IP	 address  can  be specified as regular
	      expressions, so it is possible to specify set of IP addresses by
	      one expression.  Available only on platforms which have any sup‐
	      ported RE implementation.

       -dip_pattern $re
	      This option similar to previous option, but is used  to  specify
	      set  of  disallowed  IP  addresses.  Available only on platforms
	      which have any supported RE implementation.

       -tag_pattern $tag $attrib $url
	      More powerful version of -url_pattern option  for	 more  precise
	      matching	of  allowed  URLs based on HTML tag name pattern, HTML
	      tag attribute name pattern and on URL pattern. You  can  use  in
	      all  three  parameters  of  this	option wildcard patterns, thus
	      something like -tag_pattern '*'  '*'  url_pattern	 is  equal  to
	      -url_pattern  url_pattern.  The  $tag and $attrib parameters are
	      always matched again uppercase strings. For example if you  want
	      just   let   pavuk   follow  only	 regular  links	 ignoring  any
	      stylesheets, images, etc., use option -tag_pattern A HREF '*'.

       -tag_rpattern $tag $attrib $url
	      This is variation on the -tag_pattern. It uses  regular  expres‐
	      sion patterns in parameters instead of wildcard patterns used in
	      the previous option.

Limitation Protocol Option
       -noHTTP/-HTTP
	      This switch suppresses  all  transfers  through  HTTP  protocol.
	      Default is transfer trough HTTP enabled.

       -noSSL/-SSL
	      This  switch  suppresses	all  transfers	through HTTPS protocol
	      (HTTP protocol over SSL) .  Default  is  transfer	 trough	 HTTPS
	      enabled.	 This  option is available only when compiled with SSL
	      support (you need SSleay or OpenSSL  libraries  and  development
	      headers)

       -noGopher/-Gopher
	      Suppress	 all   transfers  through  Gopher  Internet  protocol.
	      Default is transfer trough Gopher enabled.

       -noFTP/-FTP
	      This switch prevents processing documents allocated on  all  FTP
	      servers.	Default is transfer trough FTP enabled.

       -noFTPS/-FTPS
	      This  switch  prevents processing documents allocated on all FTP
	      servers accessed through SSL.  Default is transfer  trough  FTPS
	      enabled.	 This  option is available only when compiled with SSL
	      support (you need SSleay or OpenSSL  libraries  and  development
	      headers)

       -FTPhtml/-noFTPhtml
	      By  using of option -FTPhtml you can force pavuk to process HTML
	      files downloaded with FTP	 protocol.   At	 default  pavuk	 won't
	      parse HTML files from FTP servers.

       -FTPdir/-noFTPdir
	      Force  recursive	processing of FTP directories too.  At default
	      is recursive downloading from FTP servers denied.

       -disable_html_tag $TAG,[$ATTRIB][;...]
	      -enable_html_tag $TAG,[$ATTRIB][;...]  Enable  or	 disable  pro‐
	      cessing  of  particular HTML tags or attributes.	At default all
	      supported HTML tags are enabled.

	      For example if you don't want to process all images  you	should
	      use option -disable_html_tag 'IMG,SRC;INPUT,SRC;BODY,BACKGROUND'
	      .

Other Limitation Options
       -subdir $dir
	      Subdirectory of local tree directory, to limit some of the modes
	      {sync, resumeregets, linkupdate} in its tree scan.

       -dont_leave_site/-leave_site
	      (Don't) leave starting site. At default pavuk can span host when
	      recursing through WWW tree.

       -dont_leave_dir/-leave_dir
	      (Don't) leave starting directory. If -dont_leave_dir  option  is
	      used  pavuk  will stay only in starting directory (including its
	      own subdirectories).  At default pavuk can leave starting direc‐
	      tories.

       -leave_site_enter_dir/-dont_leave_site_enter_dir
	      If  you are downloading WWW tree which spans multiple hosts with
	      huge trees, you may want to allow downloading of document	 which
	      are  in  directory hierarchy below directory which we visited as
	      first   on   each	  site.	  To   obtain	this,	 use	option
	      -dont_leave_site_enter_dir.  As  default	pavuk  will go also to
	      higher directory levels on that site.

       -lmax $nr
	      Set maximum allowed level of tree traverse. Default is set to 0,
	      what  means that pavuk can traverse at infinitum.	 As of version
	      0.8pl1 inline objects of HTML pages are placed at same level  as
	      parent HTML page.

       -leave_level $nr
	      Maximum  level  of  documents outside from site of starting URL.
	      Default is set to 0, and 0 means that checking is not applied.

       -site_level $nr
	      Maximum level of	sites  outside	from  site  of	starting  URL.
	      Default is set to 0, and 0 means that checking is not applied.

       -dmax $nr
	      Set  maximum  allowed  number  of	 documents that are processed.
	      Default value is 0.  That means no restrictions are used in num‐
	      ber of processed documents.

       -singlepage/-nosinglepage
	      Using  option -singlepage allows you to transfer just HTML pages
	      with all its inlined objects (pictures, sounds, frame documents,
	      ...).   As default is disabled single page transfer. This option
	      makes -mode singlepage option obsolete.

       -limit_inlines/-dont_limit_inlines
	      With this option you can control whether limiting options	 apply
	      also  to	inline objects (pictures, sounds, ...). This is useful
	      when you want to download specified set of HTML pages  with  all
	      inline options without any restrictions.

       -user_condition $str
	      Script  or program name for users own conditions.	 You can write
	      any script which should with exit value decide if	 download  URL
	      or not.  Script gets from pavuk any number of options, with this
	      meaning :

		 -url $url - processed URL
		 -parent $url - any number of parent URLs
		 -level $nr - level of this URL from starting URL
		 -size $nr - size of requested URL
		 -date $datenr - modification time of requested URL in	format
		 YYYYMMDDhhmmss

	      The  exit	 status	 0 of script or program means that current URL
	      should be rejected and nonzero exit status means that URL should
	      be accepted.
	      Warning  :  use  user conditions only if required because of big
	      slowdowns caused by forking scripts for each checked URL.

       -follow_cmd $str
	      This option allows you to specify script or program which can by
	      its  exit status decide whether to follow URLs from current HTML
	      document. This script will be called after download of each HTML
	      document.	 The script will get following options as it's parame‐
	      ters:

		 -url $url - URL of current HTML document
		 -infile $file - local file where is stored HTML document

	      The exit status 0 of script or program means that URLs from cur‐
	      rent  document will be disallowed, other exit status means, that
	      pavuk can follow links from current HTML document.

Javascript support
       Support for scripting languages like JavaScript or VBScript in pavuk is
       done  bit hacky way. There is no interpreter for this languages, so not
       all things will work. Whole support which pavuk have for this scripting
       languages  is  based  on regular expression patterns specified by user.
       Pavuk search for this patterns in DOM event attributes of HTML tags, in
       javascript:...  URLs,  in  inline  scripts  in  HTML documents enclosed
       between <script></script> tags and in separate javascript files.	  Sup‐
       port  for  scripting languages is only available when pavuk is compiled
       with proper regular expression library (POSIX/GNU/PCRE).

       -enable_js/-disable_js
	      This options  are	 used  to  enable  or  disable	processing  of
	      Javascript  parts of HTML documents. You must enable this option
	      to be able to use processing of javascript patterns.

       -js_pattern $re
	      With this option you are specifying what patterns	 match	inter‐
	      ested  parts  of	Javascript  for extracting URLs. The parameter
	      must be RE pattern  with	exactly	 one  subpattern  which	 match
	      exactly the URL part. For example to match URL in following type
	      of javascript expressions :
		document.b1.src='pics/button1_pre.jpg'
	      you can use this pattern
		"^document.[a-zA-Z0-9_]*.src[ ]*=[ ]*'(.*)'$"

       -js_transform $p $t $h $a
	      This option is similar to	 previous,  but	 you  can  use	custom
	      transform	 rules	for the URL parts of patterns and also specify
	      the exact HTML tag and attribute where to look for this pattern.
	      The  $p  is  the pattern to match the interested part of script.
	      The $t is transform rule for the URL, in this parameter  the  $x
	      parts will be replaced by x-th subpattern of the $p pattern. The
	      $h parameter is exact  HTML  tag	or  "*"	 when  this  apply  to
	      javascript:  URLs or DOM event attribs or "" (empty string) when
	      this apply to javascript body of HTML document  or  separate  JS
	      file.  The $a parameter is exact HTML attrib of tag or "" (empty
	      string) when this rule apply to javascript body.

       -js_transform2 $p $t $h $a
	      This option is very similar to  previous.	 The  meaning  of  all
	      parameters  is  same, just the pattern $p can have only one sub‐
	      string which will be used in the	transform  rule	 $t.  This  is
	      required	to  allow  rewriting  of  URL  parts  of  the tags and
	      scripts. This option can also be used to force pavuk  to	recog‐
	      nize HTML targ/attribute pairs which pavuk does not support.

Cookie
       -cookie_file $file
	      File  where  are	stored cookie infos. This file must be in Net‐
	      scape cookie file format (generated with Netscape	 Navigator  or
	      Communicator ...).

       -cookie_send/-nocookie_send
	      Use  collected  cookies  in HTTP/HTTPS requests.	Pavuk will not
	      send at default cookies.

       -cookie_recv/-nocookie_recv
	      Store received cookies from  HTTP/HTTPS  responses  into	memory
	      cookie cache.  At default pavuk will not remember received cook‐
	      ies.

       -cookie_update/-nocookie_update
	      Update cookie file on disk and synchronize it with changes  made
	      by  any  concurrent processes.  At default pavuk will not update
	      cookie file on disk.

       -cookies_max $nr
	      Maximum number of cookies in memory cookie cache.	 Default value
	      is 0, and that means no restrictions for cookies number.

       -disabled_cookie_domains $list
	      Comma-separated  list  of	 cookie domains which are permitted to
	      send cookies stored into cookie cache

       -cookie_check/-nocookie_check
	      Check when receiving cookie, if cookie domain is equal to domain
	      of  server  which	 sends	this cookie. At default pavuk check is
	      server is setting cookies for its domain, and if it tries to set
	      cookie  for  foreign  domain  pavuk will complain about that and
	      will reject such cookie.

HTML rewriting engine tuning options
       -noRelocate/-Relocate
	      This switch prevents the program to  rewrite  relative  URLs  to
	      absolute,	 after	HTML  document	is  transfered.	 Default pavuk
	      behavior is to maintain link consistence of HTML	documents.  So
	      always  when  HTML document is downloaded pavuk will rewrite all
	      URLs to point to local document if it is available and if it  is
	      not  available  it will point to remote document. After document
	      is properly downloaded, pavuk will update links  in  HTML	 docu‐
	      ments, which point to this one.

       -all_to_local/-noall_to_local
	      This option forces pavuk to change all URLs inside HTML document
	      to local URLs immediately after download of document. Default is
	      this option disabled.

       -sel_to_local/-nosel_to_local
	      This  option  forces  pavuk to change all URLs, which accomplish
	      conditions for download, to local inside HTML  document  immedi‐
	      ately  after  download  of  document.   I	 recommend to use this
	      option, when you are sure, that transfer	will  be  without  any
	      problems. This option can save a lot of processor time.  Default
	      is this option disabled.

       -all_to_remote/-noall_to_remote
	      This option forces pavuk to change all URLs inside HTML document
	      to  remote URLs immediately after download of document.  Default
	      is this option disabled.

       -post_update/-nopost_update
	      This option is especially designed to allow in  -fnrules	option
	      doing  rules  based on MIME type of document. This option forces
	      pavuk to generate local names for	 documents  just  after	 pavuk
	      knows what is the MIME type of document. This have big impact on
	      the rewriting engine of links inside HTML documents. This option
	      causes  disfunction  of  other  options for controlling the link
	      rewriting engine. Use this option only when you  know  what  you
	      are doing :-)

       -dont_touch_url_pattern $pat
	      This options serves to deny rewriting and processing of particu‐
	      lar URLs in HTML documents by pavuk HTML rewriting engine.  This
	      option  accepts wildcard patterns to specify such URLs. Matching
	      is done against untouched URLs so when he URL is	relative,  you
	      must  use	 pattern  which	 matches  the relative URL, when it is
	      absolute, you must use absolute URL.

       -dont_touch_url_rpattern $pat
	      This option is variation on previous option. This one uses regu‐
	      lar  patterns  for matching of URLs instead of wildcard patterns
	      used by -dont_touch_url_pattern option. This option is available
	      only  when pavuk is compiled with support for regular expression
	      patterns.

       -dont_touch_tag_rpattern $pat
	      This option is variation on previous option,  just  matching  is
	      made on full HTML tag with included <>. This option accepts reg‐
	      ular expression patterns. It is available	 only  when  pavuk  is
	      compiled with support for regular expression patterns.

Filename/URL Conversion Option
       -tr_del_chr $str
	      All  characters found in $str will be deleted from local name of
	      document.	 $str should contain escape sequences similar like  in
	      tr command:
	      \n - newline
	      \r - carriage return
	      \t - horizontal tab space
	      \0xXX - hexadecimal  ASCII value
	      [:upper:] - all uppercase letters
	      [:lower:] - all lowercase letters
	      [:alpha:] - all letters
	      [:alnum:] - all letters and digits
	      [:digit:] - all digits
	      [:xdigit:] - all hexadecimal digits
	      [:space:] - all horizontal and vertical whitespace
	      [:blank:] - all horizontal whitespace
	      [:cntrl:] - all control characters
	      [:print:] - all printable characters including space
	      [:nprint:] - all non printable characters
	      [:punct:] - all punctation characters
	      [:graph:] - all printable characters excluding space

       -tr_str_str $str1 $str2
	      String  $str1  from local name of document will be replaced with
	      $str2.

       -tr_chr_chr $chrset1 $chrset2
	      Characters from $chrset1 from local name	of  document  will  be
	      replaced	with  corresponding character from $chrset2. $charset1
	      and $charset2 should have same syntax  as	 $str  in  -tr_del_chr
	      option.

       -store_name $str
	      When  you want to change local filename of first file downloaded
	      with singlepage mode, you should use this option.

       -index_name $str
	      With this option you can change directory index name. As default
	      is used _._.html .

       -store_index/-nostore_index
	      With  option -nostore_index you should deny storing of directory
	      indexes into HTML files.

       -fnrules $t $m $r
	      This is a very powerful option! This option is used to  flexible
	      change  layout  of local document tree. It accepts three parame‐
	      ters. First parameter $t is used to say what type	 is  following
	      pattern.	 F is used for wildcard pattern (uses fnmatch()) and R
	      is used for regular expression pattern (using any	 supported  RE
	      implementation).	 Second	 parameter is matching pattern used to
	      select URLs for this rule.  If  URL  match  this	pattern,  then
	      local  name  for	this  URL is computed following rules of third
	      parameter.  And third parameter is  local	 name  building	 rule.
	      Pavuk  now  supports two kinds of local name building rules. One
	      is simple based only on simple macros and other more complicated
	      extended	rule, which also enables to perform several functions.
	      Recognition between those two kinds of rules is done by  looking
	      at  first	 character  of	rule.  In case when first character is
	      '(', rule is extended and in all other cases it  is  the	simple
	      kind of rule.

	      Simple  rule  should contain literals or escaped macros.	Macros
	      are escaped by % character or by $ character.

	      Here is list of recognized macros:

	      $x - where x is any positive number. This macro is replaced with
	      x-th  substring matched by RE pattern. (If you use this you need
	      to understand RE !)
	      %i - is replaced with protocol id (http, https, ftp, gopher)
	      %p - is replaced with password. (use this only when usable)
	      %u - is replaced with username.
	      %h - is replaced with host name.
	      %m - is replaced with domain name.
	      %r - is replaced with port number.
	      %d - is replaced with path to document.
	      %n - is replaced with document name.
	      %b - is replaced with basename of document (without extension).
	      %e - is replaced with extension.
	      %s - is replaced with searchstring.
	      %M - is replaced with MIME type of document. When you are	 using
	      this  macro,  you	 *must*	 use  also -post_update option else it
	      won't work.
	      %E - is replaced with default extension assigned to MIME type of
	      document.	 When  you  are	 using this macro, you *must* use also
	      -post_update option else it won't work.
	      %x - where x is positive number. This macro is replaced with  x-
	      th directory from path to document from beginning.
	      %-x - where x is positive number. This macro is replaced with x-
	      th directory from path to document from end.

	      Here is example. If you want place document into single directo‐
	      ries by extension, you should use following fnrules option:
	      -fnrules F '*' '/%e/%n'

	      Extended	rule ever begins with character ´('. It uses some kind
	      of LISP like syntax.

	      Here are base rules for writing extended rules  :	 -  the	 local
	      filename of of this kind is return value function
	      - each function is enclosed inside round braces ()
	      - first token right after opening brace is function name
	      - each function have nonzero fixed number of parameters
	      - each function returns numeric or string value
	      - function parameters are separated by any number of space char‐
	      acters
	      - parameter of function should be string, number, macro or other
	      function
	      - string is ever quoted with "
	      -	 each  numeric	parameter  can be in any encoding supported by
	      strtod() function (octal, decimal, hexadecimal, ...)
	      - there is no implicit conversion from number to string
	      - each macro is prefixed by % character  and  is	one  character
	      long
	      -	 each macro is replaced by its string representation from cur‐
	      rent URL
	      - function parameters are typed strictly
	      - toplevel function must return string value

	      Extended rule supports full set of %  escaped  macros  supported
	      with simple rules, plus two following addition macros :
	      %U - URL string
	      %o - default localname for URL

	      Here is description of all supported functions

	      sc - concat two string parameters
		 - accepts two string parameters
		 - returns string value
	      ss - substring form string
		 - accepts three parameters.
		   - first is string from which we want to cut subpart
		   -  second  is  number which represents starting position in
	      string
		   - third is  number  which  represents  ending  position  in
	      string
		 - returns string value
	      hsh - compute modulo hash value from string with specified base
		 - accepts two parameters
		   - first is string for which we are computing the hash value
		   - second is numeric value for base of modulo hash
		 - returns numeric value
	      md5 - compute MD5 checksum for string
		 - accepts one string value
		 - returns string which represents MD5 checksum
	      lo - convert all characters inside string to lower case
		 - accepts ane string value
		 - returns string value
	      up - convert all characters inside string to upper case
		 - accepts one string value
		 - returns string value
	      ue - encode unsafe characters in string with same encoding which
	      is used for encoding  unsafe  characters	inside	URL  (%xx)  As
	      default  are  encoded  all nonascii values when this function is
	      used.
		 - accepts two string values
		   - first is string which we want to encode
		   - second is string which contains unsafe characters
		 - return string value
	      dc - delete unwanted characters from string (have similar	 func‐
	      tionality as -tr_del_chr option)
		 - accepts two string values
		   - first is string from which we want delete
		   -  second  is  string  which contains characters we want to
	      delete.
		 - returns string value
	      tc - replace character with other character in string (have sim‐
	      ilar functionality as -tr_chr_chr option)
		 - accepts three string values
		   -  first  is string inside which we want to replace charac‐
	      ters
		   - second is set of characters which we want to replace
		   - third is set of characters with which we are replacing
		 - returns string value
	      ts - replace some string inside string  with  any	 other	string
	      (have similar functionality as -tr_str_str option)
		 - accepts three string values
		   - first is string inside which we want to replace string
		   - second is the from string
		   - third is to string
		 - returns string value
	      spn  -  calculate	 initial  length of string which contains only
	      specified set of characters.  (have same functionality  as  str‐
	      spn() libc function)
		 - accepts two string values
		   - first is input string
		   - second is set of acceptable characters
		 - returns numeric value
	      cspn  - calculate initial length of string which doesn't contain
	      specified set of characters.  (have same functionality as	 strc‐
	      spn() libc function)
		 - accepts two string values
		   - first is input string
		   - second is set of unacceptable characters
		 - returns numeric value
	      sl - calculate length of string
		 - accepts one string value
		 - returns numeric value
	      ns - convert number to string by format
		 - accepts two parameters
		   -  first  parameter	is  format string same as for printf()
	      function
		   - second is number which we want to convert
		 - returns string value
	      lc - return position of last occurrence of  specified  character
	      inside string
		 - accepts two string parameters
		   - first string which we are searching in
		   - second string contains character for which we are looking
	      for
		 - returns numeric value
	      + - add two numeric values
		 - accepts two numeric values
		 - returns numeric value
	      - - subtract two numeric values
		 - accepts two numeric values
		 - returns numeric value
	      % - modulo addition
		 - accepts two numeric values
		 - returns numeric value
	      * - multiple two numeric values
		 - accepts two numeric values
		 - returns numeric value
	      / - divide two numeric values
		 - accepts two numeric values
		 - returns numeric value
	      rmpar - remove parameter from query string
		- accepts two string
		  - first string is string which we are adjusting
		  - second parameter is name  of  parameter  which  should  be
	      removed
		- returns adjusted string
	      getval - get query string parameter value
		- accepts two string
		  - first string is query string from which to get the parame‐
	      ter
		    value (usually %s)
		  - second string is name of parameter for which  we  want  to
	      get
		    the value
		-  returns  value  of  the  parameter or empty string when the
	      parameter
		  doesn't exists
	      sif - logical decision
		- accepts three parameters
		  - first is numeric and when is  zero	than  result  of  this
	      decision
		    is	result	of  second parameter, else result is result of
	      third
		    parameter
		  - second parameter is string
		  - third parameter is string
		- returns string result of decision
	      ! - logical not
		- accepts one numeric parameter
		- returns negation of parameter
	      & - logical and
		- accept two numeric parameters
		- returns logical and of parameters
	      | - logical or
		- accept two numeric parameters
		- returns logical or of parameters
	      getext - get file extension
		- accept one sting (filename or path)
		- return string containing extension of parameter
	      seq - compare two strings
		- accepts two strings for comparison
		- returns numeric value 0 - if different 1 - if equal
	      jsf - execute JavaScript function
		- accepts one string parameter which holds name of
		  JavaScript function specified in script loaded with
		  -js_script_file option.
		- returns string value equal to return value of
		  JavaScript function
		- this function is available only when pavuk is compiled
		  with support for JavaScript bindings

	      For example, if you are mirroring very huge number  of  internet
	      sites  into same local directory, too much entries in one direc‐
	      tory, should cause performance problems. You may use for example
	      hsh  or  md5  functions to generate one additional level of hash
	      directories based on hostname whit one of following options :

	      -fnrules F '*' '(sc (nc "%02d/" (hsh %h 100)) %o)'
	      -fnrules F '*' '(sc (ss (md5 %h) 0 2) %o)'

       -base_level $nr
	      Number of directory levels to omit in local tree.

	      For	  example	  when	       downloading	   URL
	      ftp://ftp.idata.sk/pub/unix/www/pavuk-0.7pl1.tgz	you  enter  at
	      command line  -base_level	 4  in	local  tree  will  be  created
	      www/pavuk-0.7pl1.tgz					   not
	      ftp/ftp.idata.sk_21/pub/unix/www/pavuk-0.7pl1.tgz as normally.

       -default_prefix $str
	      Default prefix of mirrored directory. This option is  used  only
	      when  you	 are trying to synchronize content of remote directory
	      which was downloaded using -base_level option. Also you must use
	      directory	 based	synchronization method, not URL based synchro‐
	      nization method. This is especially useful, when	used  in  con‐
	      junction with -remove_old option.

       -remove_adv/-noremove_adv
	      This  option is used for turn on/off of removing HTML tags which
	      contains advertisement banners.  The  banners  are  not  removed
	      from  HTML file, but are commented out.  Such URLs also will not
	      be downloaded.  This option have	effect	only  when  used  with
	      option  -adv_re.	 Default is turned off.	 This option is avail‐
	      able only when your system have support  for  one	 of  supported
	      regular expressions implementation.

       -adv_re $RE
	      This  option is used to specify regular expressions for matching
	      URLs  of	advertisement  banners.	   For	 example   :   -adv_re
	      http://ad.doubleclick.net/.*   is	 used  to match all files from
	      server ad.doubleclick.net.  This option is available  only  when
	      your  system  have any supported regular expressions implementa‐
	      tion.

       -unique_name/-nounique_name
	      Pavuk as default always attempts to assign to unique URL	unique
	      local  filename.	 If  this  behavior is not wanted, you can use
	      option -nounique_name to disable this.

Other Options
       -sleep $nr
	      This option allows you to specify number of seconds during  that
	      the  program  will be suspended between two transfers. Useful to
	      deny server overload.  Default value for this option is 0.

       -rsleep/-norsleep
	      When this option is active, pavuk randomizes the the sleep  time
	      between  transfers  in interval between zero and value specified
	      with -sleep option. Default is this option inactive.

       -ddays $nr
	      If document has modification time later as  $nr  days,  then  in
	      sync mode pavuk attempts to retrieve newer copy of document from
	      remote server. Default value is 0.

       -remove_old/-noremove_old
	      Remove improper documents (that, which doesn't exist  on	remote
	      site).   This  option  have  effect  only when used in directory
	      based sync mode.	When used with URL based sync mode, pavuk will
	      not  remove any old files which were excluded from document tree
	      and are not referenced in any HTML document.  You must also  use
	      option -subdir, to let pavuk find files which belongs to current
	      mirror.  As default pavuk won't remove any old files.

       -browser $str
	      is used to set your browser command (in URL tree dialog you  can
	      use  right click to raise menu, from which you can start browser
	      on actually selected URL).  This option is available  only  when
	      compiled with GTK GUI and with support for URL tree preview.

       -debug/-nodebug
	      turns  on displaying of debug messages. This option is available
	      only when compiled with -DDEBUG.	If -debug option is used pavuk
	      will  output verbose information about documents, whole protocol
	      level information, locking informations  and  more  (depends  on
	      -debug_level  setup).  This options is used just like trigger to
	      enable output of debug messages selected by -debug_level option.
	      Default is debug mode turned off.

       -debug_level $level
	      Set  level of required debug informations. $level can be numeric
	      value which represent binary mask for requested debug levels, or
	      comma separated list of supported debug levels.  Currently pavuk
	      supports following debug levels :
	      html - for HTML parser debugging
	      protos - to see server side protocol messages
	      protoc - to see client side protocol messages
	      procs - to see some special procedure calls
	      locks - for debugging of documents locking
	      net - for debugging some low level network stuff
	      misc - for miscellaneous unsorted debug messages
	      user - for verbose user level messages
	      all - request all currently supported debug levels
	      mtlock - locking of resources in multithreading environment
	      mtthr - launching/weaking/sleeping/stoping of threads in	multi‐
	      threaded environment
	      protod - for DEBUGGING of POST requests
	      limits - for debugging limiting options, you will see the reason
	      why particular URLs are  rejected	 by  pavuk  and	 which	option
	      caused this.
	      ssl - to enable verbose reporting about SSL related things.

       -remind_cmd $str
	      This  option  have  effect  only	when running pavuk in reminder
	      mode. To command specified with this option pavuk	 sends	result
	      of  running  reminder  mode.   There  are	 listed URLs which are
	      changed and URLs which have any errors.  Default remind  command
	      is "mailx user@server -s \"pavuk reminder result\"" .

       -nscache_dir $dir
	      Path  to	Netscape  browser cache directory. If you specify this
	      path, pavuk attempts to find out if you have URL in this	cache.
	      If  URL  is there it will be fetched else pavuk will download it
	      from net. The cache directory index file must be named  index.db
	      and  must	 be  located  in the cache directory.  To support this
	      feature, pavuk have to be linked with BerkeleyDB 1.8x .

       -mozcache_dir $dir
	      Path to Mozilla browser cache directory. Same  functionality  as
	      with  previous option, just for different browser with different
	      cache formats.  Pavuk supports both formats of  Mozilla  browser
	      disk  cache  (old for versions <0.9 and new used in 0.9=<).  The
	      old format cache directory must contain  cache  directory	 index
	      database	with  name  cache.db.  Then new format cache directory
	      must  contain  map  file	_CACHE_MAP_,  and  three  block	 files
	      _CACHE_001_,  _CACHE_002_,  _CACHE_003_.	To support old Mozilla
	      cache format, pavuk have to be linked with BerkeleyDB 1.8x.  New
	      Mozilla cache format doesn't require any external library.

       -post_cmd $str
	      Post-processing command, which will be executed after successful
	      download of document.  This command may somehow handle with doc‐
	      ument.  During time of running this command, pavuk leaves actual
	      document locked, so there isn't chance  that  some  other	 pavuk
	      process  will modify document.  This postprocessing command will
	      get three additional parameters from pavuk.
		 - local name of document
		 - 1/0 1 if document is HTML document, 0 if not
		 - original URL of this document

       -hack_add_index/-nohack_add_index
	      This is bit hacky option. It forces pavuk to add	to  URL	 queue
	      also directory indexes of all queued documents. This allow pavuk
	      to download more documents from site, than it is able achieve in
	      normal  traversing  of  HTML documents.  Bit dirty but useful in
	      some cases.

       -js_script_file $file
	      Pavuk have optionally builtin JavaScript	interpreter  to	 allow
	      high  level customization of some internal procedures. Currently
	      you are allowed to customize with your own JavaScript  functions
	      two  things.  You can use it to set precise limiting options, or
	      you can write own functions which can be used  inside  rules  of
	      -fnrules	option.	  With	this  option  you  can load JavaScript
	      script with functions into  pavuks  internal  JavaScript	inter‐
	      preter. To learn more about this capabilities read separate doc‐
	      ument jsbind.txt which comes  with  pavuk	 sources  in  toplevel
	      directory.  This option is available only when you have compiled
	      pavuk with support for JavaScript bindings.

EXIT STATUS
       As of version 0.9pl29 pavuk have changed indication of status  by  exit
       codes.	In earlier versions exit status 0 was for no error and nonzero
       exit status was something like count of failed documents.  In all  ver‐
       sion after 0.0pl29 there are defined following exit codes:

	   0 - no error, everything is OK
	   1 - error in configuration of pavuk options or
	       error in config files
	   2 - some error occurred while downloading documents

ENVIRONMENTAL VARIABLES
       USER   variable	is used to construct email address from user and host‐
	      name

       LC_* or LANG
	      used to set internationalized environment

       PAVUKRC_FILE
	      with this variable you can specify alternative location for your
	      pavukrc configuration file.

REQUIRED EXTERNAL PROGRAMS
       at     is used for scheduling.

       gunzip is used to decode gzip or compress encoded documents.

Bugs
       If you find any, please let me know.

FILES
       @SYSCONFDIR@/pavukrc

       ~/.pavukrc

       ~/.pavuk_prefs

	      These  files  are	 used as default configuration files.  You may
	      specify there some constant values like  your  proxy  server  or
	      your  preferred  WWW browser. Configuration options reflect com‐
	      mand line options.  Not all parameters are suitable for  use  in
	      default  configuration  file.   You  should  select only some of
	      them, which you really need.

	      File ~/.pavuk_prefs is special file which contains automatically
	      stored  configuration.   This file is used only when running GUI
	      interface of pavuk and option -prefs is active.

	      First (if present)  parsed  file	is  @SYSCONFDIR@/pavukrc  then
	      ~/.pavukrc (if present), then ~/.pavuk_prefs (if present).  Last
	      the command line is parsed. The precedence is as follows :

	      - highest -
	      Entered in user interface
	      Entered in command line
	      ~/.pavuk_prefs
	      ~/.pavukrc
	      @SYSCONFDIR@/pavukrc
	      - lowest -

	      Here is table of config file - command line options pairs.

	      MaxLevel:			 --->  -lmax
	      MaxDocs:			 --->  -dmax
	      MaxSize:			 --->  -maxsize
	      MinSize:			 --->  -minsize
	      SleepBetween:		 --->  -sleep
	      MaxRetry:			 --->  -retry
	      MaxRegets:		 --->  -nregets
	      MaxRedirections:		 --->  -nredirs
	      CommTimeout:		 --->  -timeout
	      RegetRollbackAmount:	 --->  -rollback
	      DocExpiration:		 --->  -ddays
	      UseCache:			 --->  -nocache
	      UseRobots:		 --->  -noRobots
	      AllowFTP:			 --->  -noFTP
	      AllowHTTP:		 --->  -noHTTP
	      AllowSSL:			 --->  -noSSL
	      AllowGopher:		 --->  -noGopher
	      AllowCGI:			 --->  -noCGI
	      AllowGZEncoding:		 --->  -noEnc
	      AllowFTPRecursion:	 --->  -FTPdir
	      ForceReget:		 --->  -force_reget
	      Debug:			 --->  -debug
	      AllowedSites:		 --->  -asite
	      DisallowedSites:		 --->  -dsite
	      AllowedDomains:		 --->  -adomain
	      DisallowedDomains:	 --->  -ddomain
	      AllowedPrefixes:		 --->  -aprefix
	      DisallowedPrefixes:	 --->  -dprefix
	      AllowedSuffixes:		 --->  -asfx
	      DisallowedSuffixes:	 --->  -dsfx
	      AllowedMIMETypes:		 --->  -amimet
	      DisallowedMIMETypes:	 --->  -dmimet
	      PreferredLanguages:	 --->  -alang
	      PreferredCharset:		 --->  -acharset
	      WorkingDir:		 --->  -cdir
	      WorkingSubDir:		 --->  -subdir
	      HTTPAuthorizationScheme:	 --->  -auth_scheme
	      HTTPAuthorizationName:	 --->  -auth_name
	      HTTPAuthorizationPassword: --->  -auth_passwd
	      AuthReuseDigestNonce:	 --->  -auth_reuse_nonce
	      SSLCertPassword:		 --->  -ssl_cert_passwd
	      SSLCertFile:		 --->  -ssl_cert_file
	      SSLKeyFile:		 --->  -ssl_key_file
	      EmailAddress:		 --->  -from
	      MatchPattern:		 --->  -pattern
	      REMatchPattern:		 --->  -rpattern
	      SkipMatchPattern:		 --->  -skip_pattern
	      SkipREMatchPattern:	 --->  -skip_rpattern
	      URLMatchPattern:		 --->  -url_pattern
	      URLREMatchPattern:	 --->  -url_rpattern
	      SkipURLMatchPattern:	 --->  -skip_url_pattern
	      SkipURLREMatchPattern:	 --->  -skip_url_rpattern
	      DefaultMode:		 --->  -mode
	      FTPProxy:			 --->  -ftp_proxy
	      HTTPProxy:		 --->  -http_proxy
	      SSLProxy:			 --->  -ssl_proxy
	      GopherProxy:		 --->  -gopher_proxy
	      FTPViaHTTPProxy:		 --->  -ftp_httpgw
	      GopherViaHTTPProxy:	 --->  -gopher_httpgw
	      HTTPProxyUser:		 --->  -http_proxy_user
	      HTTPProxyPass:		 --->  -http_proxy_pass
	      HTTPProxyAuth:		 --->  -http_proxy_auth
	      AuthReuseProxyDigestNonce: --->  -auth_reuse_proxy_nonce
	      Browser:			 --->  -browser
	      ScenarioDir:		 --->  -scndir
	      ShowProgress:		 --->  -progress
	      XMaxLogSize:		 --->  -xmaxlog
	      LogFile:			 --->  -logfile
	      RemoveOldDocuments:	 --->  -remove_old
	      AuthFile:			 --->  -auth_file
	      BaseLevel:		 --->  -base_level
	      FTPDirtyProxy:		 --->  -ftp_dirtyproxy
	      ActiveFTPData:		 --->  -ftp_active/-ftp_passive
	      ActiveFTPPortRange:	 --->  -active_ftp_port_range
	      AlwaysMDTM:		 --->  -always_mdtm/-noalways_mdtm
	      RemoveBeforeStore:	 --->  -(no)remove_before_store
	      ShowDownloadTime:		 --->  -stime
	      NLSMessageCatalogDir:	 --->  -msgcat
	      Quiet:			 --->  -quiet/-verbose
	      NewerThan:		 --->  -newer_than
	      OlderThan:		 --->  -older_than
	      Reschedule:		 --->  -reschedule
	      DontLeaveSite:		 --->  -dont_leave_site/-leave_site
	      DontLeaveDir:		 --->  -dont_leave_dir/-leave_dir
	      PreserveTime:		 --->  -preserve_time/-nopreserve_time
	      LeaveLevel:		 --->  -leave_level
	      GUIFont:			 --->  -gui_font
	      UserCondition:		 --->  -user_condition
	      CookieFile:		 --->  -cookie_file
	      CookieSend:		 --->  -cookie_send/-nocookie_send
	      CookieRecv:		 --->  -cookie_recv/-nocookie_recv
	      CookieUpdate:		 --->  -cookie_update/-nocookie_update
	      CookiesMax:		 --->  -cookies_max
	      CookieCheckDomain:	 --->  -cookie_check/-nocookie_check
	      DisabledCookieDomains:	 --->  -disabled_cookie_domains
	      DisableHTMLTag:		 --->  -disable_html_tag
	      EnableHTMLTag:		 --->  -enable_html_tag
	      TrDeleteChar:		 --->  -tr_del_chr
	      TrStrToStr:		 --->  -tr_str_str
	      TrChrToChr:		 --->  -tr_chr_chr
	      IndexName:		 --->  -index_name
	      StoreName:		 --->  -store_name
	      PreservePermisions:	 --->  -preserve_perm/-nopreserve_perm
	      PreserveAbsoluteSymlinks:	 --->  -preserve_slinks/-nopreserve_slinks
	      FTPListCMD:		 --->  -FTPlist/-noFTPlist
	      MaxRate:			 --->  -maxrate
	      MinRate:			 --->  -minrate
	      ReadBufferSize:		 --->  -bufsize
	      BgMode:			 --->  -bg/-nobg
	      CheckSize:		 --->  -check_size/-nocheck_size
	      SLogFile:			 --->  -slogfile
	      Identity:			 --->  -identity
	      SendFromHeader:		 --->  -send_from/-nosend_from
	      RunX:			 --->  -runX
	      FnameRules:		 --->  -fnrules
	      StoreDocInfoFiles:	 --->  -store_info/-nostore_info
	      AllLinksToLocal:		 --->  -all_to_local/-noall_to_local
	      AllLinksToRemote:		 --->  -all_to_remote/-noall_to_remote
	      SelectedLinksToLocal:	 --->  -sel_to_local/-nosel_to_local
	      ReminderCMD:		 --->  -remind_cmd
	      AutoReferer:		 --->  -auto_referer/-noauto_referer
	      URLsFile:			 --->  -urls_file
	      UsePreferences:		 --->  -prefs/-noprefs
	      FTPhtml:			 --->  -FTPhtml/-noFTPhtml
	      StoreDirIndexFile:	 --->  -store_index/-nostore_index
	      Language:			 --->  -language
	      FileSizeQuota:		 --->  -file_quota
	      TransferQuota:		 --->  -trans_quota
	      FSQuota:			 --->  -fs_quota
	      EnableJS:			 --->  -enable_js/-disable_js
	      UrlSchedulingStrategy:	 --->  -url_strategy
	      NetscapeCacheDir:		 --->  -nscache_dir
	      RemoveAdvertisement:	 --->  -remove_adv/-noremove_adv
	      AdvBannerRE:		 --->  -adv_re
	      CheckIfRunnigAtBackground: --->  -check_bg/-nocheck_bg
	      SendIfRange:		 --->  -send_if_range/-nosend_if_range
	      SchedulingCommand:	 --->  -sched_cmd
	      UniqueLogName:		 --->  -unique_log/-nounique_log
	      PostCommand:		 --->  -post_cmd
	      SSLVersion:		 --->  -ssl_version
	      UniqueSSLID:		 --->  -unique_sslid/-nounique_sslid
	      AddHTTPHeader:		 --->  -httpad
	      StatisticsFile:		 --->  -statfile
	      WaitOnExit:		 --->  -ewait
	      AllowedIPAdrressPattern:	 --->  -aip_pattern
	      DisallowedIPAdrressPattern:--->  -dip_pattern
	      SiteLevel:		 --->  -site_level
	      UseHTTP11:		 --->  -use_http11
	      MaxRunTime:		 --->  -max_time
	      LocalIP:			 --->  -local_ip
	      RequestInfo:		 --->  -request
	      HashSize:			 --->  -hash_size
	      NumberOfThreads:		 --->  -nthreads
	      ImmediateMessages:	 --->  -immesg/-noimmsg
	      HTMLFormData:		 --->  -formdata
	      DumpFD:			 --->  -dumpfd
	      DumpUrlFD:		 --->  -dump_urlfd
	      DeleteAfterTransfer:	 --->  -del_after/-nodel_after
	      UniqueDocName:		 --->  -unique_name/-nounique_name
	      LeaveSiteEnterDirectory:	 --->  -leave_site_enter_dir/-dont_leave_site_enter_dir
	      SinglePage:		 --->  -singlepage/-nosinglepage
	      NTLMAuthorizationDomain:	 --->  -auth_ntlm_domain
	      NTLMProxyAuthorizationDomain:
					 --->  -auth_proxy_ntlm_domain
	      JavascriptPattern:	 --->  -js_pattern
	      FollowCommand:		 --->  -follow_cmd
	      RetrieveSymlinks:		 --->  -retrieve_symlink/-noretrieve_symlink
	      JSTransform:		 --->  -js_transform
	      JSTransform2:		 --->  -js_transform2
	      FTPProxyUser:		 --->  -ftp_proxy_user
	      FTPProxyPassword:		 --->  -ftp_proxy_pass
	      LimitInlineObjects:	 --->  -limit_inlines/-dont_limit_inlines
	      FTPListOptions:		 --->  -ftp_list_options
	      FixWuFTPDBrokenLISTcmd:	 --->  -fix_wuftpd_list/-nofix_wuftpd_list
	      PostUpdate:		 --->  -post_update/-nopost_update
	      SeparateInfoDir:		 --->  -info_dir
	      MozillaCacheDir:		 --->  -mozcache_dir
	      AllowedPorts:		 --->  -aport
	      DisallowedPorts:		 --->  -dport
	      HackAddIndex:		 --->  -hack_add_index/-nohack_add_index
	      JavaScriptFile:		 --->  -js_script_file
	      FtpLoginHandshake:	 --->  -ftp_login_handshake
	      NSSCertDir:		 --->  -nss_cert_dir
	      NSSAcceptUnknownCert:	 --->  -nss_accept_unknown_cert/-nonss_accept_unknown_cert
	      NSSDomesticPolicy:	 --->  -nss_domestic_policy/-nss_export_policy
	      DontTouchUrlREPattern:	 --->  -dont_touch_url_rpattern
	      DontTouchUrlPattern:	 --->  -dont_touch_url_pattern
	      DontTouchTagREPattern:	 --->  -dont_touch_tag_rpattern
	      HTMLTagPattern:		 --->  -tag_pattern
	      HTMLTagREPattern:		 --->  -tag_rpattern
	      URL:			 --->  one URL (more lines with URL:
					       ... means more URL's)

       line which begins with '#' means comment.
       TrStrToStr: and TrChrToChr:  must  contain  two	quoted	strings.   All
       parameter  names	 are  case insensitive. If here is missing any option,
       try to look inside config.c source file.

       See pavukrc.sample file for example

       .pavuk_authinfo

	      File should contain as  many  authentification  records  as  you
	      need.   Records  are  separated  by  any	number of empty lines.
	      Parameter name is case insensitive.

	      Structure of record:

	      Proto: <proto ID>	   ---> identification of protocol
					(ftp/http/https/..)
				   - required field
	      Host: <host:[port]>  ---> host name
				   - required field
	      User: <user>	   ---> name of user
				   - optional
	      Pass: <password>	   ---> password for user
				   - optional
	      Base: <path>	   ---> base prefix of document path
				   - optional
	      Realm: <name>	   ---> realm for HTTP authorization
				   - optional
	      NTLMDomain: <domain> ---> NT/LM domain for NTLM authorization
				   - optional
	      Type: <type>	   ---> HTTP authentification scheme
					     - 1/user	- user auth scheme
					     - 2/Basic	- Basic auth scheme (default)
					     - 3/Digest - Digest auth scheme
					     - 4/NTLM	- NTLM auth scheme
				   - optional

       see pavuk_authinfo.sample file for example

       ~/.pavuk_keys
	      this is file where are  stored  information  about  configurable
	      menu  option  shortcuts.	 This  is available only when compiled
	      with Gtk+1.2 and higher.

       ~/.pavuk_remind_db
	      this file	 contains  informations	 about	URLs  for  running  in
	      reminder	mode.  Structure  of this file is very easy. Each line
	      contains information abou one URL.  first entry in line is  last
	      known modification time of URL (stored in time_t format - number
	      of secons from 1.1.1970 GMT).  And second entry is URL.

EXAMPLE COMMAND LINE
       pavuk -mode mirror -nobg -store_info -info_dir
       /mirror/info -nthreads 1 -cdir /mirror/incoming -subdir
       /mirror/incoming -preserve_time -nopreserve_perm
       -nopreserve_slinks -noretrieve_symlink -force_reget
       -noRobots -trans_quota 16384 -maxsize 16777216
       -max_time 28 -nodel_after -remove_before_store -ftpdir
       -ftplist -ftp_list_options -a -dont_leave_site
       -dont_leave_dir -all_to_local -remove_old -nostore_index
       -active_ftp_port_range 57344:65535 -always_mdtm
       -ftp_passive -base_level 2 http://<my_host>/doc/

SEE ALSO
       Look into ChangeLog file for more informations about  new  features  in
       particular versions of pavuk.

AUTHOR
       Main development Ondrejicka Stefan
       Look into CREDITS file of sources for additional information.

AVAILABILITY
       pavuk is available from http://pavuk.sourceforge.net/

0.9.35			       2016-02-18T16:47			      pavuk(1)
[top]

List of man pages available for DragonFly

Copyright (c) for man pages and the logo by the respective OS vendor.

For those who want to learn more, the polarhome community provides shell access and support.

[legal] [privacy] [GNU] [policy] [cookies] [netiquette] [sponsors] [FAQ]
Tweet
Polarhome, production since 1999.
Member of Polarhome portal.
Based on Fawad Halim's script.
....................................................................
Vote for polarhome
Free Shell Accounts :: the biggest list on the net