merge from the debian-sid branch
[ntk/apt.git] / apt-pkg / acquire.h
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: acquire.h,v 1.29.2.1 2003/12/24 23:09:17 mdz Exp $
4 /* ######################################################################
5
6 Acquire - File Acquiration
7
8 This module contians the Acquire system. It is responsible for bringing
9 files into the local pathname space. It deals with URIs for files and
10 URI handlers responsible for downloading or finding the URIs.
11
12 Each file to download is represented by an Acquire::Item class subclassed
13 into a specialization. The Item class can add itself to several URI
14 acquire queues each prioritized by the download scheduler. When the
15 system is run the proper URI handlers are spawned and the the acquire
16 queues are fed into the handlers by the schedular until the queues are
17 empty. This allows for an Item to be downloaded from an alternate source
18 if the first try turns out to fail. It also alows concurrent downloading
19 of multiple items from multiple sources as well as dynamic balancing
20 of load between the sources.
21
22 Schedualing of downloads is done on a first ask first get basis. This
23 preserves the order of the download as much as possible. And means the
24 fastest source will tend to process the largest number of files.
25
26 Internal methods and queues for performing gzip decompression,
27 md5sum hashing and file copying are provided to allow items to apply
28 a number of transformations to the data files they are working with.
29
30 ##################################################################### */
31 /*}}}*/
32
33 /** \defgroup acquire Acquire system {{{
34 *
35 * \brief The Acquire system is responsible for retrieving files from
36 * local or remote URIs and postprocessing them (for instance,
37 * verifying their authenticity). The core class in this system is
38 * pkgAcquire, which is responsible for managing the download queues
39 * during the download. There is at least one download queue for
40 * each supported protocol; protocols such as http may provide one
41 * queue per host.
42 *
43 * Each file to download is represented by a subclass of
44 * pkgAcquire::Item. The files add themselves to the download
45 * queue(s) by providing their URI information to
46 * pkgAcquire::Item::QueueURI, which calls pkgAcquire::Enqueue.
47 *
48 * Once the system is set up, the Run method will spawn subprocesses
49 * to handle the enqueued URIs; the scheduler will then take items
50 * from the queues and feed them into the handlers until the queues
51 * are empty.
52 *
53 * \todo Acquire supports inserting an object into several queues at
54 * once, but it is not clear what its behavior in this case is, and
55 * no subclass of pkgAcquire::Item seems to actually use this
56 * capability.
57 */ /*}}}*/
58
59 /** \addtogroup acquire
60 *
61 * @{
62 *
63 * \file acquire.h
64 */
65
66 #ifndef PKGLIB_ACQUIRE_H
67 #define PKGLIB_ACQUIRE_H
68
69 #include <apt-pkg/macros.h>
70
71 #include <vector>
72 #include <string>
73
74 using std::vector;
75 using std::string;
76
77
78 #include <sys/time.h>
79 #include <unistd.h>
80
81 class pkgAcquireStatus;
82
83 /** \brief The core download scheduler. {{{
84 *
85 * This class represents an ongoing download. It manages the lists
86 * of active and pending downloads and handles setting up and tearing
87 * down download-related structures.
88 *
89 * \todo Why all the protected data items and methods?
90 */
91 class pkgAcquire
92 {
93 public:
94
95 class Item;
96 class Queue;
97 class Worker;
98 struct MethodConfig;
99 struct ItemDesc;
100 friend class Item;
101 friend class Queue;
102
103 typedef vector<Item *>::iterator ItemIterator;
104 typedef vector<Item *>::const_iterator ItemCIterator;
105
106 protected:
107
108 /** \brief A list of items to download.
109 *
110 * This is built monotonically as items are created and only
111 * emptied when the download shuts down.
112 */
113 vector<Item *> Items;
114
115 /** \brief The head of the list of active queues.
116 *
117 * \todo why a hand-managed list of queues instead of std::list or
118 * std::set?
119 */
120 Queue *Queues;
121
122 /** \brief The head of the list of active workers.
123 *
124 * \todo why a hand-managed list of workers instead of std::list
125 * or std::set?
126 */
127 Worker *Workers;
128
129 /** \brief The head of the list of acquire method configurations.
130 *
131 * Each protocol (http, ftp, gzip, etc) via which files can be
132 * fetched can have a representation in this list. The
133 * configuration data is filled in by parsing the 100 Capabilities
134 * string output by a method on startup (see
135 * pkgAcqMethod::pkgAcqMethod and pkgAcquire::GetConfig).
136 *
137 * \todo why a hand-managed config dictionary instead of std::map?
138 */
139 MethodConfig *Configs;
140
141 /** \brief The progress indicator for this download. */
142 pkgAcquireStatus *Log;
143
144 /** \brief The total size of the files which are to be fetched.
145 *
146 * This is not necessarily the total number of bytes to download
147 * when, e.g., download resumption and list updates via patches
148 * are taken into account.
149 */
150 unsigned long ToFetch;
151
152 // Configurable parameters for the scheduler
153
154 /** \brief Represents the queuing strategy for remote URIs. */
155 enum QueueStrategy {
156 /** \brief Generate one queue for each protocol/host combination; downloads from
157 * multiple hosts can proceed in parallel.
158 */
159 QueueHost,
160 /** \brief Generate a single queue for each protocol; serialize
161 * downloads from multiple hosts.
162 */
163 QueueAccess} QueueMode;
164
165 /** \brief If \b true, debugging information will be dumped to std::clog. */
166 bool const Debug;
167 /** \brief If \b true, a download is currently in progress. */
168 bool Running;
169
170 /** \brief Add the given item to the list of items. */
171 void Add(Item *Item);
172
173 /** \brief Remove the given item from the list of items. */
174 void Remove(Item *Item);
175
176 /** \brief Add the given worker to the list of workers. */
177 void Add(Worker *Work);
178
179 /** \brief Remove the given worker from the list of workers. */
180 void Remove(Worker *Work);
181
182 /** \brief Insert the given fetch request into the appropriate queue.
183 *
184 * \param Item The URI to download and the item to download it
185 * for. Copied by value into the queue; no reference to Item is
186 * retained.
187 */
188 void Enqueue(ItemDesc &Item);
189
190 /** \brief Remove all fetch requests for this item from all queues. */
191 void Dequeue(Item *Item);
192
193 /** \brief Determine the fetch method and queue of a URI.
194 *
195 * \param URI The URI to fetch.
196 *
197 * \param[out] Config A location in which to place the method via
198 * which the URI is to be fetched.
199 *
200 * \return the string-name of the queue in which a fetch request
201 * for the given URI should be placed.
202 */
203 string QueueName(string URI,MethodConfig const *&Config);
204
205 /** \brief Build up the set of file descriptors upon which select() should
206 * block.
207 *
208 * The default implementation inserts the file descriptors
209 * corresponding to active downloads.
210 *
211 * \param[out] Fd The largest file descriptor in the generated sets.
212 *
213 * \param[out] RSet The set of file descriptors that should be
214 * watched for input.
215 *
216 * \param[out] WSet The set of file descriptors that should be
217 * watched for output.
218 */
219 virtual void SetFds(int &Fd,fd_set *RSet,fd_set *WSet);
220
221 /** Handle input from and output to file descriptors which select()
222 * has determined are ready. The default implementation
223 * dispatches to all active downloads.
224 *
225 * \param RSet The set of file descriptors that are ready for
226 * input.
227 *
228 * \param WSet The set of file descriptors that are ready for
229 * output.
230 */
231 virtual void RunFds(fd_set *RSet,fd_set *WSet);
232
233 /** \brief Check for idle queues with ready-to-fetch items.
234 *
235 * Called by pkgAcquire::Queue::Done each time an item is dequeued
236 * but remains on some queues; i.e., another queue should start
237 * fetching it.
238 */
239 void Bump();
240
241 public:
242
243 /** \brief Retrieve information about a fetch method by name.
244 *
245 * \param Access The name of the method to look up.
246 *
247 * \return the method whose name is Access, or \b NULL if no such method exists.
248 */
249 MethodConfig *GetConfig(string Access);
250
251 /** \brief Provides information on how a download terminated. */
252 enum RunResult {
253 /** \brief All files were fetched successfully. */
254 Continue,
255
256 /** \brief Some files failed to download. */
257 Failed,
258
259 /** \brief The download was cancelled by the user (i.e., #Log's
260 * pkgAcquireStatus::Pulse() method returned \b false).
261 */
262 Cancelled};
263
264 /** \brief Download all the items that have been Add()ed to this
265 * download process.
266 *
267 * This method will block until the download completes, invoking
268 * methods on #Log to report on the progress of the download.
269 *
270 * \param PulseInterval The method pkgAcquireStatus::Pulse will be
271 * invoked on #Log at intervals of PulseInterval milliseconds.
272 *
273 * \return the result of the download.
274 */
275 RunResult Run(int PulseInterval=500000);
276
277 /** \brief Remove all items from this download process, terminate
278 * all download workers, and empty all queues.
279 */
280 void Shutdown();
281
282 /** \brief Get the first #Worker object.
283 *
284 * \return the first active worker in this download process.
285 */
286 inline Worker *WorkersBegin() {return Workers;};
287
288 /** \brief Advance to the next #Worker object.
289 *
290 * \return the worker immediately following I, or \b NULL if none
291 * exists.
292 */
293 Worker *WorkerStep(Worker *I);
294
295 /** \brief Get the head of the list of items. */
296 inline ItemIterator ItemsBegin() {return Items.begin();};
297
298 /** \brief Get the end iterator of the list of items. */
299 inline ItemIterator ItemsEnd() {return Items.end();};
300
301 // Iterate over queued Item URIs
302 class UriIterator;
303 /** \brief Get the head of the list of enqueued item URIs.
304 *
305 * This iterator will step over every element of every active
306 * queue.
307 */
308 UriIterator UriBegin();
309 /** \brief Get the end iterator of the list of enqueued item URIs. */
310 UriIterator UriEnd();
311
312 /** Deletes each entry in the given directory that is not being
313 * downloaded by this object. For instance, when downloading new
314 * list files, calling Clean() will delete the old ones.
315 *
316 * \param Dir The directory to be cleaned out.
317 *
318 * \return \b true if the directory exists and is readable.
319 */
320 bool Clean(string Dir);
321
322 /** \return the total size in bytes of all the items included in
323 * this download.
324 */
325 double TotalNeeded();
326
327 /** \return the size in bytes of all non-local items included in
328 * this download.
329 */
330 double FetchNeeded();
331
332 /** \return the amount of data to be fetched that is already
333 * present on the filesystem.
334 */
335 double PartialPresent();
336
337 /** \brief Delayed constructor
338 *
339 * \param Progress indicator associated with this download or
340 * \b NULL for none. This object is not owned by the
341 * download process and will not be deleted when the pkgAcquire
342 * object is destroyed. Naturally, it should live for at least as
343 * long as the pkgAcquire object does.
344 * \param Lock defines a lock file that should be acquired to ensure
345 * only one Acquire class is in action at the time or an empty string
346 * if no lock file should be used.
347 */
348 bool Setup(pkgAcquireStatus *Progress = NULL, string const &Lock = "");
349
350 /** \brief Construct a new pkgAcquire. */
351 pkgAcquire(pkgAcquireStatus *Log) __deprecated;
352 pkgAcquire();
353
354 /** \brief Destroy this pkgAcquire object.
355 *
356 * Destroys all queue, method, and item objects associated with
357 * this download.
358 */
359 virtual ~pkgAcquire();
360
361 private:
362 /** \brief FD of the Lock file we acquire in Setup (if any) */
363 int LockFD;
364
365 /** \brief Ensure the existence of the given Path
366 *
367 * \param Parent directory of the Path directory - a trailing
368 * /apt/ will be removed before CreateDirectory call.
369 * \param Path which should exist after (successful) call
370 */
371 bool CheckDirectory(string const &Parent, string const &Path) const;
372 };
373
374 /** \brief Represents a single download source from which an item
375 * should be downloaded.
376 *
377 * An item may have several assocated ItemDescs over its lifetime.
378 */
379 struct pkgAcquire::ItemDesc
380 {
381 /** \brief The URI from which to download this item. */
382 string URI;
383 /** brief A description of this item. */
384 string Description;
385 /** brief A shorter description of this item. */
386 string ShortDesc;
387 /** brief The underlying item which is to be downloaded. */
388 Item *Owner;
389 };
390 /*}}}*/
391 /** \brief A single download queue in a pkgAcquire object. {{{
392 *
393 * \todo Why so many protected values?
394 */
395 class pkgAcquire::Queue
396 {
397 friend class pkgAcquire;
398 friend class pkgAcquire::UriIterator;
399 friend class pkgAcquire::Worker;
400
401 /** \brief The next queue in the pkgAcquire object's list of queues. */
402 Queue *Next;
403
404 protected:
405
406 /** \brief A single item placed in this queue. */
407 struct QItem : pkgAcquire::ItemDesc
408 {
409 /** \brief The next item in the queue. */
410 QItem *Next;
411 /** \brief The worker associated with this item, if any. */
412 pkgAcquire::Worker *Worker;
413
414 /** \brief Assign the ItemDesc portion of this QItem from
415 * another ItemDesc
416 */
417 void operator =(pkgAcquire::ItemDesc const &I)
418 {
419 URI = I.URI;
420 Description = I.Description;
421 ShortDesc = I.ShortDesc;
422 Owner = I.Owner;
423 };
424 };
425
426 /** \brief The name of this queue. */
427 string Name;
428
429 /** \brief The head of the list of items contained in this queue.
430 *
431 * \todo why a by-hand list instead of an STL structure?
432 */
433 QItem *Items;
434
435 /** \brief The head of the list of workers associated with this queue.
436 *
437 * \todo This is plural because support exists in Queue for
438 * multiple workers. However, it does not appear that there is
439 * any way to actually associate more than one worker with a
440 * queue.
441 *
442 * \todo Why not just use a std::set?
443 */
444 pkgAcquire::Worker *Workers;
445
446 /** \brief the download scheduler with which this queue is associated. */
447 pkgAcquire *Owner;
448
449 /** \brief The number of entries in this queue that are currently
450 * being downloaded.
451 */
452 signed long PipeDepth;
453
454 /** \brief The maximum number of entries that this queue will
455 * attempt to download at once.
456 */
457 unsigned long MaxPipeDepth;
458
459 public:
460
461 /** \brief Insert the given fetch request into this queue.
462 *
463 * \return \b true if the queuing was successful. May return
464 * \b false if the Item is already in the queue
465 */
466 bool Enqueue(ItemDesc &Item);
467
468 /** \brief Remove all fetch requests for the given item from this queue.
469 *
470 * \return \b true if at least one request was removed from the queue.
471 */
472 bool Dequeue(Item *Owner);
473
474 /** \brief Locate an item in this queue.
475 *
476 * \param URI A URI to match against.
477 * \param Owner A pkgAcquire::Worker to match against.
478 *
479 * \return the first item in the queue whose URI is #URI and that
480 * is being downloaded by #Owner.
481 */
482 QItem *FindItem(string URI,pkgAcquire::Worker *Owner);
483
484 /** Presumably this should start downloading an item?
485 *
486 * \todo Unimplemented. Implement it or remove?
487 */
488 bool ItemStart(QItem *Itm,unsigned long Size);
489
490 /** \brief Remove the given item from this queue and set its state
491 * to pkgAcquire::Item::StatDone.
492 *
493 * If this is the only queue containing the item, the item is also
494 * removed from the main queue by calling pkgAcquire::Dequeue.
495 *
496 * \param Itm The item to remove.
497 *
498 * \return \b true if no errors are encountered.
499 */
500 bool ItemDone(QItem *Itm);
501
502 /** \brief Start the worker process associated with this queue.
503 *
504 * If a worker process is already associated with this queue,
505 * this is equivalent to calling Cycle().
506 *
507 * \return \b true if the startup was successful.
508 */
509 bool Startup();
510
511 /** \brief Shut down the worker process associated with this queue.
512 *
513 * \param Final If \b true, then the process is stopped unconditionally.
514 * Otherwise, it is only stopped if it does not need cleanup
515 * as indicated by the pkgAcqMethod::NeedsCleanup member of
516 * its configuration.
517 *
518 * \return \b true.
519 */
520 bool Shutdown(bool Final);
521
522 /** \brief Send idle items to the worker process.
523 *
524 * Fills up the pipeline by inserting idle items into the worker's queue.
525 */
526 bool Cycle();
527
528 /** \brief Check for items that could be enqueued.
529 *
530 * Call this after an item placed in multiple queues has gone from
531 * the pkgAcquire::Item::StatFetching state to the
532 * pkgAcquire::Item::StatIdle state, to possibly refill an empty queue.
533 * This is an alias for Cycle().
534 *
535 * \todo Why both this and Cycle()? Are they expected to be
536 * different someday?
537 */
538 void Bump();
539
540 /** \brief Create a new Queue.
541 *
542 * \param Name The name of the new queue.
543 * \param Owner The download process that owns the new queue.
544 */
545 Queue(string Name,pkgAcquire *Owner);
546
547 /** Shut down all the worker processes associated with this queue
548 * and empty the queue.
549 */
550 ~Queue();
551 };
552 /*}}}*/
553 /** \brief Iterates over all the URIs being fetched by a pkgAcquire object. {{{*/
554 class pkgAcquire::UriIterator
555 {
556 /** The next queue to iterate over. */
557 pkgAcquire::Queue *CurQ;
558 /** The item that we currently point at. */
559 pkgAcquire::Queue::QItem *CurItem;
560
561 public:
562
563 inline void operator ++() {operator ++(0);};
564
565 void operator ++(int)
566 {
567 CurItem = CurItem->Next;
568 while (CurItem == 0 && CurQ != 0)
569 {
570 CurItem = CurQ->Items;
571 CurQ = CurQ->Next;
572 }
573 };
574
575 inline pkgAcquire::ItemDesc const *operator ->() const {return CurItem;};
576 inline bool operator !=(UriIterator const &rhs) const {return rhs.CurQ != CurQ || rhs.CurItem != CurItem;};
577 inline bool operator ==(UriIterator const &rhs) const {return rhs.CurQ == CurQ && rhs.CurItem == CurItem;};
578
579 /** \brief Create a new UriIterator.
580 *
581 * \param Q The queue over which this UriIterator should iterate.
582 */
583 UriIterator(pkgAcquire::Queue *Q) : CurQ(Q), CurItem(0)
584 {
585 while (CurItem == 0 && CurQ != 0)
586 {
587 CurItem = CurQ->Items;
588 CurQ = CurQ->Next;
589 }
590 }
591 };
592 /*}}}*/
593 /** \brief Information about the properties of a single acquire method. {{{*/
594 struct pkgAcquire::MethodConfig
595 {
596 /** \brief The next link on the acquire method list.
597 *
598 * \todo Why not an STL container?
599 */
600 MethodConfig *Next;
601
602 /** \brief The name of this acquire method (e.g., http). */
603 string Access;
604
605 /** \brief The implementation version of this acquire method. */
606 string Version;
607
608 /** \brief If \b true, only one download queue should be created for this
609 * method.
610 */
611 bool SingleInstance;
612
613 /** \brief If \b true, this method supports pipelined downloading. */
614 bool Pipeline;
615
616 /** \brief If \b true, the worker process should send the entire
617 * APT configuration tree to the fetch subprocess when it starts
618 * up.
619 */
620 bool SendConfig;
621
622 /** \brief If \b true, this fetch method does not require network access;
623 * all files are to be acquired from the local disk.
624 */
625 bool LocalOnly;
626
627 /** \brief If \b true, the subprocess has to carry out some cleanup
628 * actions before shutting down.
629 *
630 * For instance, the cdrom method needs to unmount the CD after it
631 * finishes.
632 */
633 bool NeedsCleanup;
634
635 /** \brief If \b true, this fetch method acquires files from removable media. */
636 bool Removable;
637
638 /** \brief Set up the default method parameters.
639 *
640 * All fields are initialized to NULL, "", or \b false as
641 * appropriate.
642 */
643 MethodConfig();
644 };
645 /*}}}*/
646 /** \brief A monitor object for downloads controlled by the pkgAcquire class. {{{
647 *
648 * \todo Why protected members?
649 *
650 * \todo Should the double members be uint64_t?
651 */
652 class pkgAcquireStatus
653 {
654 protected:
655
656 /** \brief The last time at which this monitor object was updated. */
657 struct timeval Time;
658
659 /** \brief The time at which the download started. */
660 struct timeval StartTime;
661
662 /** \brief The number of bytes fetched as of the previous call to
663 * pkgAcquireStatus::Pulse, including local items.
664 */
665 double LastBytes;
666
667 /** \brief The current rate of download as of the most recent call
668 * to pkgAcquireStatus::Pulse, in bytes per second.
669 */
670 double CurrentCPS;
671
672 /** \brief The number of bytes fetched as of the most recent call
673 * to pkgAcquireStatus::Pulse, including local items.
674 */
675 double CurrentBytes;
676
677 /** \brief The total number of bytes that need to be fetched.
678 *
679 * \warning This member is inaccurate, as new items might be
680 * enqueued while the download is in progress!
681 */
682 double TotalBytes;
683
684 /** \brief The total number of bytes accounted for by items that
685 * were successfully fetched.
686 */
687 double FetchedBytes;
688
689 /** \brief The amount of time that has elapsed since the download
690 * started.
691 */
692 unsigned long ElapsedTime;
693
694 /** \brief The total number of items that need to be fetched.
695 *
696 * \warning This member is inaccurate, as new items might be
697 * enqueued while the download is in progress!
698 */
699 unsigned long TotalItems;
700
701 /** \brief The number of items that have been successfully downloaded. */
702 unsigned long CurrentItems;
703
704 public:
705
706 /** \brief If \b true, the download scheduler should call Pulse()
707 * at the next available opportunity.
708 */
709 bool Update;
710
711 /** \brief If \b true, extra Pulse() invocations will be performed.
712 *
713 * With this option set, Pulse() will be called every time that a
714 * download item starts downloading, finishes downloading, or
715 * terminates with an error.
716 */
717 bool MorePulses;
718
719 /** \brief Invoked when a local or remote file has been completely fetched.
720 *
721 * \param Size The size of the file fetched.
722 *
723 * \param ResumePoint How much of the file was already fetched.
724 */
725 virtual void Fetched(unsigned long Size,unsigned long ResumePoint);
726
727 /** \brief Invoked when the user should be prompted to change the
728 * inserted removable media.
729 *
730 * This method should not return until the user has confirmed to
731 * the user interface that the media change is complete.
732 *
733 * \param Media The name of the media type that should be changed.
734 *
735 * \param Drive The identifying name of the drive whose media
736 * should be changed.
737 *
738 * \return \b true if the user confirms the media change, \b
739 * false if it is cancelled.
740 *
741 * \todo This is a horrible blocking monster; it should be CPSed
742 * with prejudice.
743 */
744 virtual bool MediaChange(string Media,string Drive) = 0;
745
746 /** \brief Invoked when an item is confirmed to be up-to-date.
747
748 * For instance, when an HTTP download is informed that the file on
749 * the server was not modified.
750 */
751 virtual void IMSHit(pkgAcquire::ItemDesc &/*Itm*/) {};
752
753 /** \brief Invoked when some of an item's data is fetched. */
754 virtual void Fetch(pkgAcquire::ItemDesc &/*Itm*/) {};
755
756 /** \brief Invoked when an item is successfully and completely fetched. */
757 virtual void Done(pkgAcquire::ItemDesc &/*Itm*/) {};
758
759 /** \brief Invoked when the process of fetching an item encounters
760 * a fatal error.
761 */
762 virtual void Fail(pkgAcquire::ItemDesc &/*Itm*/) {};
763
764 /** \brief Periodically invoked while the Acquire process is underway.
765 *
766 * Subclasses should first call pkgAcquireStatus::Pulse(), then
767 * update their status output. The download process is blocked
768 * while Pulse() is being called.
769 *
770 * \return \b false if the user asked to cancel the whole Acquire process.
771 *
772 * \see pkgAcquire::Run
773 */
774 virtual bool Pulse(pkgAcquire *Owner);
775
776 /** \brief Invoked when the Acquire process starts running. */
777 virtual void Start();
778
779 /** \brief Invoked when the Acquire process stops running. */
780 virtual void Stop();
781
782 /** \brief Initialize all counters to 0 and the time to the current time. */
783 pkgAcquireStatus();
784 virtual ~pkgAcquireStatus() {};
785 };
786 /*}}}*/
787 /** @} */
788
789 #endif